summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Dickinson <dickinsm@gmail.com>2009-03-17 23:03:46 (GMT)
committerMark Dickinson <dickinsm@gmail.com>2009-03-17 23:03:46 (GMT)
commit277859d5910782cc31bb27f2472893b8382ad391 (patch)
tree1992a663205d997ba06db1113284828d6a1654c0
parented3558b3343d3af563829693483b28a4003711c8 (diff)
downloadcpython-277859d5910782cc31bb27f2472893b8382ad391.zip
cpython-277859d5910782cc31bb27f2472893b8382ad391.tar.gz
cpython-277859d5910782cc31bb27f2472893b8382ad391.tar.bz2
Issue #2110: Add support for thousands separator and 'n' format specifier
to Decimal __format__ method.
-rw-r--r--Lib/decimal.py285
-rw-r--r--Lib/test/test_decimal.py98
-rw-r--r--Misc/NEWS3
3 files changed, 302 insertions, 84 deletions
diff --git a/Lib/decimal.py b/Lib/decimal.py
index 2b16e6a..55589ad 100644
--- a/Lib/decimal.py
+++ b/Lib/decimal.py
@@ -3506,18 +3506,16 @@ class Decimal(object):
return self # My components are also immutable
return self.__class__(str(self))
- # PEP 3101 support. See also _parse_format_specifier and _format_align
- def __format__(self, specifier, context=None):
+ # PEP 3101 support. the _localeconv keyword argument should be
+ # considered private: it's provided for ease of testing only.
+ def __format__(self, specifier, context=None, _localeconv=None):
"""Format a Decimal instance according to the given specifier.
The specifier should be a standard format specifier, with the
form described in PEP 3101. Formatting types 'e', 'E', 'f',
- 'F', 'g', 'G', and '%' are supported. If the formatting type
- is omitted it defaults to 'g' or 'G', depending on the value
- of context.capitals.
-
- At this time the 'n' format specifier type (which is supposed
- to use the current locale) is not supported.
+ 'F', 'g', 'G', 'n' and '%' are supported. If the formatting
+ type is omitted it defaults to 'g' or 'G', depending on the
+ value of context.capitals.
"""
# Note: PEP 3101 says that if the type is not present then
@@ -3528,17 +3526,20 @@ class Decimal(object):
if context is None:
context = getcontext()
- spec = _parse_format_specifier(specifier)
+ spec = _parse_format_specifier(specifier, _localeconv=_localeconv)
- # special values don't care about the type or precision...
+ # special values don't care about the type or precision
if self._is_special:
- return _format_align(str(self), spec)
+ sign = _format_sign(self._sign, spec)
+ body = str(self.copy_abs())
+ return _format_align(sign, body, spec)
# a type of None defaults to 'g' or 'G', depending on context
- # if type is '%', adjust exponent of self accordingly
if spec['type'] is None:
spec['type'] = ['g', 'G'][context.capitals]
- elif spec['type'] == '%':
+
+ # if type is '%', adjust exponent of self accordingly
+ if spec['type'] == '%':
self = _dec_from_triple(self._sign, self._int, self._exp+2)
# round if necessary, taking rounding mode from the context
@@ -3547,53 +3548,45 @@ class Decimal(object):
if precision is not None:
if spec['type'] in 'eE':
self = self._round(precision+1, rounding)
- elif spec['type'] in 'gG':
- if len(self._int) > precision:
- self = self._round(precision, rounding)
elif spec['type'] in 'fF%':
self = self._rescale(-precision, rounding)
+ elif spec['type'] in 'gG' and len(self._int) > precision:
+ self = self._round(precision, rounding)
# special case: zeros with a positive exponent can't be
# represented in fixed point; rescale them to 0e0.
- elif not self and self._exp > 0 and spec['type'] in 'fF%':
+ if not self and self._exp > 0 and spec['type'] in 'fF%':
self = self._rescale(0, rounding)
# figure out placement of the decimal point
leftdigits = self._exp + len(self._int)
- if spec['type'] in 'fF%':
- dotplace = leftdigits
- elif spec['type'] in 'eE':
+ if spec['type'] in 'eE':
if not self and precision is not None:
dotplace = 1 - precision
else:
dotplace = 1
+ elif spec['type'] in 'fF%':
+ dotplace = leftdigits
elif spec['type'] in 'gG':
if self._exp <= 0 and leftdigits > -6:
dotplace = leftdigits
else:
dotplace = 1
- # figure out main part of numeric string...
- if dotplace <= 0:
- num = '0.' + '0'*(-dotplace) + self._int
- elif dotplace >= len(self._int):
- # make sure we're not padding a '0' with extra zeros on the right
- assert dotplace==len(self._int) or self._int != '0'
- num = self._int + '0'*(dotplace-len(self._int))
+ # find digits before and after decimal point, and get exponent
+ if dotplace < 0:
+ intpart = '0'
+ fracpart = '0'*(-dotplace) + self._int
+ elif dotplace > len(self._int):
+ intpart = self._int + '0'*(dotplace-len(self._int))
+ fracpart = ''
else:
- num = self._int[:dotplace] + '.' + self._int[dotplace:]
-
- # ...then the trailing exponent, or trailing '%'
- if leftdigits != dotplace or spec['type'] in 'eE':
- echar = {'E': 'E', 'e': 'e', 'G': 'E', 'g': 'e'}[spec['type']]
- num = num + "{0}{1:+}".format(echar, leftdigits-dotplace)
- elif spec['type'] == '%':
- num = num + '%'
-
- # add sign
- if self._sign == 1:
- num = '-' + num
- return _format_align(num, spec)
+ intpart = self._int[:dotplace] or '0'
+ fracpart = self._int[dotplace:]
+ exp = leftdigits-dotplace
+ # done with the decimal-specific stuff; hand over the rest
+ # of the formatting to the _format_number function
+ return _format_number(self._sign, intpart, fracpart, exp, spec)
def _dec_from_triple(sign, coefficient, exponent, special=False):
"""Create a decimal instance directly, without any validation,
@@ -5437,14 +5430,13 @@ _all_zeros = re.compile('0*$').match
_exact_half = re.compile('50*$').match
##### PEP3101 support functions ##############################################
-# The functions parse_format_specifier and format_align have little to do
-# with the Decimal class, and could potentially be reused for other pure
+# The functions in this section have little to do with the Decimal
+# class, and could potentially be reused or adapted for other pure
# Python numeric classes that want to implement __format__
#
# A format specifier for Decimal looks like:
#
-# [[fill]align][sign][0][minimumwidth][.precision][type]
-#
+# [[fill]align][sign][0][minimumwidth][,][.precision][type]
_parse_format_specifier_regex = re.compile(r"""\A
(?:
@@ -5454,14 +5446,23 @@ _parse_format_specifier_regex = re.compile(r"""\A
(?P<sign>[-+ ])?
(?P<zeropad>0)?
(?P<minimumwidth>(?!0)\d+)?
+(?P<thousands_sep>,)?
(?:\.(?P<precision>0|(?!0)\d+))?
-(?P<type>[eEfFgG%])?
+(?P<type>[eEfFgGn%])?
\Z
""", re.VERBOSE)
del re
-def _parse_format_specifier(format_spec):
+# The locale module is only needed for the 'n' format specifier. The
+# rest of the PEP 3101 code functions quite happily without it, so we
+# don't care too much if locale isn't present.
+try:
+ import locale as _locale
+except ImportError:
+ pass
+
+def _parse_format_specifier(format_spec, _localeconv=None):
"""Parse and validate a format specifier.
Turns a standard numeric format specifier into a dict, with the
@@ -5471,9 +5472,14 @@ def _parse_format_specifier(format_spec):
align: alignment type, either '<', '>', '=' or '^'
sign: either '+', '-' or ' '
minimumwidth: nonnegative integer giving minimum width
+ zeropad: boolean, indicating whether to pad with zeros
+ thousands_sep: string to use as thousands separator, or ''
+ grouping: grouping for thousands separators, in format
+ used by localeconv
+ decimal_point: string to use for decimal point
precision: nonnegative integer giving precision, or None
type: one of the characters 'eEfFgG%', or None
- unicode: either True or False (always True for Python 3.x)
+ unicode: boolean (always True for Python 3.x)
"""
m = _parse_format_specifier_regex.match(format_spec)
@@ -5483,26 +5489,25 @@ def _parse_format_specifier(format_spec):
# get the dictionary
format_dict = m.groupdict()
- # defaults for fill and alignment
+ # zeropad; defaults for fill and alignment. If zero padding
+ # is requested, the fill and align fields should be absent.
fill = format_dict['fill']
align = format_dict['align']
- if format_dict.pop('zeropad') is not None:
- # in the face of conflict, refuse the temptation to guess
- if fill is not None and fill != '0':
+ format_dict['zeropad'] = (format_dict['zeropad'] is not None)
+ if format_dict['zeropad']:
+ if fill is not None:
raise ValueError("Fill character conflicts with '0'"
" in format specifier: " + format_spec)
- if align is not None and align != '=':
+ if align is not None:
raise ValueError("Alignment conflicts with '0' in "
"format specifier: " + format_spec)
- fill = '0'
- align = '='
format_dict['fill'] = fill or ' '
format_dict['align'] = align or '<'
+ # default sign handling: '-' for negative, '' for positive
if format_dict['sign'] is None:
format_dict['sign'] = '-'
- # turn minimumwidth and precision entries into integers.
# minimumwidth defaults to 0; precision remains None if not given
format_dict['minimumwidth'] = int(format_dict['minimumwidth'] or '0')
if format_dict['precision'] is not None:
@@ -5514,58 +5519,172 @@ def _parse_format_specifier(format_spec):
if format_dict['type'] in 'gG' or format_dict['type'] is None:
format_dict['precision'] = 1
+ # determine thousands separator, grouping, and decimal separator, and
+ # add appropriate entries to format_dict
+ if format_dict['type'] == 'n':
+ # apart from separators, 'n' behaves just like 'g'
+ format_dict['type'] = 'g'
+ if _localeconv is None:
+ _localeconv = _locale.localeconv()
+ if format_dict['thousands_sep'] is not None:
+ raise ValueError("Explicit thousands separator conflicts with "
+ "'n' type in format specifier: " + format_spec)
+ format_dict['thousands_sep'] = _localeconv['thousands_sep']
+ format_dict['grouping'] = _localeconv['grouping']
+ format_dict['decimal_point'] = _localeconv['decimal_point']
+ else:
+ if format_dict['thousands_sep'] is None:
+ format_dict['thousands_sep'] = ''
+ format_dict['grouping'] = [3, 0]
+ format_dict['decimal_point'] = '.'
+
# record whether return type should be str or unicode
format_dict['unicode'] = isinstance(format_spec, unicode)
return format_dict
-def _format_align(body, spec_dict):
- """Given an unpadded, non-aligned numeric string, add padding and
- aligment to conform with the given format specifier dictionary (as
- output from parse_format_specifier).
+def _format_align(sign, body, spec):
+ """Given an unpadded, non-aligned numeric string 'body' and sign
+ string 'sign', add padding and aligment conforming to the given
+ format specifier dictionary 'spec' (as produced by
+ parse_format_specifier).
- It's assumed that if body is negative then it starts with '-'.
- Any leading sign ('-' or '+') is stripped from the body before
- applying the alignment and padding rules, and replaced in the
- appropriate position.
+ Also converts result to unicode if necessary.
"""
- # figure out the sign; we only examine the first character, so if
- # body has leading whitespace the results may be surprising.
- if len(body) > 0 and body[0] in '-+':
- sign = body[0]
- body = body[1:]
- else:
- sign = ''
-
- if sign != '-':
- if spec_dict['sign'] in ' +':
- sign = spec_dict['sign']
- else:
- sign = ''
-
# how much extra space do we have to play with?
- minimumwidth = spec_dict['minimumwidth']
- fill = spec_dict['fill']
- padding = fill*(max(minimumwidth - (len(sign+body)), 0))
+ minimumwidth = spec['minimumwidth']
+ fill = spec['fill']
+ padding = fill*(minimumwidth - len(sign) - len(body))
- align = spec_dict['align']
+ align = spec['align']
if align == '<':
result = sign + body + padding
elif align == '>':
result = padding + sign + body
elif align == '=':
result = sign + padding + body
- else: #align == '^'
+ elif align == '^':
half = len(padding)//2
result = padding[:half] + sign + body + padding[half:]
+ else:
+ raise ValueError('Unrecognised alignment field')
# make sure that result is unicode if necessary
- if spec_dict['unicode']:
+ if spec['unicode']:
result = unicode(result)
return result
+def _group_lengths(grouping):
+ """Convert a localeconv-style grouping into a (possibly infinite)
+ iterable of integers representing group lengths.
+
+ """
+ # The result from localeconv()['grouping'], and the input to this
+ # function, should be a list of integers in one of the
+ # following three forms:
+ #
+ # (1) an empty list, or
+ # (2) nonempty list of positive integers + [0]
+ # (3) list of positive integers + [locale.CHAR_MAX], or
+
+ from itertools import chain, repeat
+ if not grouping:
+ return []
+ elif grouping[-1] == 0 and len(grouping) >= 2:
+ return chain(grouping[:-1], repeat(grouping[-2]))
+ elif grouping[-1] == _locale.CHAR_MAX:
+ return grouping[:-1]
+ else:
+ raise ValueError('unrecognised format for grouping')
+
+def _insert_thousands_sep(digits, spec, min_width=1):
+ """Insert thousands separators into a digit string.
+
+ spec is a dictionary whose keys should include 'thousands_sep' and
+ 'grouping'; typically it's the result of parsing the format
+ specifier using _parse_format_specifier.
+
+ The min_width keyword argument gives the minimum length of the
+ result, which will be padded on the left with zeros if necessary.
+
+ If necessary, the zero padding adds an extra '0' on the left to
+ avoid a leading thousands separator. For example, inserting
+ commas every three digits in '123456', with min_width=8, gives
+ '0,123,456', even though that has length 9.
+
+ """
+
+ sep = spec['thousands_sep']
+ grouping = spec['grouping']
+
+ groups = []
+ for l in _group_lengths(grouping):
+ if groups:
+ min_width -= len(sep)
+ if l <= 0:
+ raise ValueError("group length should be positive")
+ # max(..., 1) forces at least 1 digit to the left of a separator
+ l = min(max(len(digits), min_width, 1), l)
+ groups.append('0'*(l - len(digits)) + digits[-l:])
+ digits = digits[:-l]
+ min_width -= l
+ if not digits and min_width <= 0:
+ break
+ else:
+ l = max(len(digits), min_width, 1)
+ groups.append('0'*(l - len(digits)) + digits[-l:])
+ return sep.join(reversed(groups))
+
+def _format_sign(is_negative, spec):
+ """Determine sign character."""
+
+ if is_negative:
+ return '-'
+ elif spec['sign'] in ' +':
+ return spec['sign']
+ else:
+ return ''
+
+def _format_number(is_negative, intpart, fracpart, exp, spec):
+ """Format a number, given the following data:
+
+ is_negative: true if the number is negative, else false
+ intpart: string of digits that must appear before the decimal point
+ fracpart: string of digits that must come after the point
+ exp: exponent, as an integer
+ spec: dictionary resulting from parsing the format specifier
+
+ This function uses the information in spec to:
+ insert separators (decimal separator and thousands separators)
+ format the sign
+ format the exponent
+ add trailing '%' for the '%' type
+ zero-pad if necessary
+ fill and align if necessary
+ """
+
+ sign = _format_sign(is_negative, spec)
+
+ if fracpart:
+ fracpart = spec['decimal_point'] + fracpart
+
+ if exp != 0 or spec['type'] in 'eE':
+ echar = {'E': 'E', 'e': 'e', 'G': 'E', 'g': 'e'}[spec['type']]
+ fracpart += "{0}{1:+}".format(echar, exp)
+ if spec['type'] == '%':
+ fracpart += '%'
+
+ if spec['zeropad']:
+ min_width = spec['minimumwidth'] - len(fracpart) - len(sign)
+ else:
+ min_width = 0
+ intpart = _insert_thousands_sep(intpart, spec, min_width)
+
+ return _format_align(sign, intpart+fracpart, spec)
+
+
##### Useful Constants (internal use only) ################################
# Reusable defaults
diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py
index eed1eed..6c071f1 100644
--- a/Lib/test/test_decimal.py
+++ b/Lib/test/test_decimal.py
@@ -616,6 +616,7 @@ class DecimalImplicitConstructionTest(unittest.TestCase):
self.assertEqual(eval('Decimal(10)' + sym + 'E()'),
'10' + rop + 'str')
+
class DecimalFormatTest(unittest.TestCase):
'''Unit tests for the format function.'''
def test_formatting(self):
@@ -705,15 +706,110 @@ class DecimalFormatTest(unittest.TestCase):
('', '1.00', '1.00'),
- # check alignment
+ # test alignment and padding
('<6', '123', '123 '),
('>6', '123', ' 123'),
('^6', '123', ' 123 '),
('=+6', '123', '+ 123'),
+ ('#<10', 'NaN', 'NaN#######'),
+ ('#<10', '-4.3', '-4.3######'),
+ ('#<+10', '0.0130', '+0.0130###'),
+ ('#< 10', '0.0130', ' 0.0130###'),
+ ('@>10', '-Inf', '@-Infinity'),
+ ('#>5', '-Inf', '-Infinity'),
+ ('?^5', '123', '?123?'),
+ ('%^6', '123', '%123%%'),
+ (' ^6', '-45.6', '-45.6 '),
+ ('/=10', '-45.6', '-/////45.6'),
+ ('/=+10', '45.6', '+/////45.6'),
+ ('/= 10', '45.6', ' /////45.6'),
+
+ # thousands separator
+ (',', '1234567', '1,234,567'),
+ (',', '123456', '123,456'),
+ (',', '12345', '12,345'),
+ (',', '1234', '1,234'),
+ (',', '123', '123'),
+ (',', '12', '12'),
+ (',', '1', '1'),
+ (',', '0', '0'),
+ (',', '-1234567', '-1,234,567'),
+ (',', '-123456', '-123,456'),
+ ('7,', '123456', '123,456'),
+ ('8,', '123456', '123,456 '),
+ ('08,', '123456', '0,123,456'), # special case: extra 0 needed
+ ('+08,', '123456', '+123,456'), # but not if there's a sign
+ (' 08,', '123456', ' 123,456'),
+ ('08,', '-123456', '-123,456'),
+ ('+09,', '123456', '+0,123,456'),
+ # ... with fractional part...
+ ('07,', '1234.56', '1,234.56'),
+ ('08,', '1234.56', '1,234.56'),
+ ('09,', '1234.56', '01,234.56'),
+ ('010,', '1234.56', '001,234.56'),
+ ('011,', '1234.56', '0,001,234.56'),
+ ('012,', '1234.56', '0,001,234.56'),
+ ('08,.1f', '1234.5', '01,234.5'),
+ # no thousands separators in fraction part
+ (',', '1.23456789', '1.23456789'),
+ (',%', '123.456789', '12,345.6789%'),
+ (',e', '123456', '1.23456e+5'),
+ (',E', '123456', '1.23456E+5'),
]
for fmt, d, result in test_values:
self.assertEqual(format(Decimal(d), fmt), result)
+ def test_n_format(self):
+ try:
+ from locale import CHAR_MAX
+ except ImportError:
+ return
+
+ # Set up some localeconv-like dictionaries
+ en_US = {
+ 'decimal_point' : '.',
+ 'grouping' : [3, 3, 0],
+ 'thousands_sep': ','
+ }
+
+ fr_FR = {
+ 'decimal_point' : ',',
+ 'grouping' : [CHAR_MAX],
+ 'thousands_sep' : ''
+ }
+
+ ru_RU = {
+ 'decimal_point' : ',',
+ 'grouping' : [3, 3, 0],
+ 'thousands_sep' : ' '
+ }
+
+ crazy = {
+ 'decimal_point' : '&',
+ 'grouping' : [1, 4, 2, CHAR_MAX],
+ 'thousands_sep' : '-'
+ }
+
+
+ def get_fmt(x, locale, fmt='n'):
+ return Decimal.__format__(Decimal(x), fmt, _localeconv=locale)
+
+ self.assertEqual(get_fmt(Decimal('12.7'), en_US), '12.7')
+ self.assertEqual(get_fmt(Decimal('12.7'), fr_FR), '12,7')
+ self.assertEqual(get_fmt(Decimal('12.7'), ru_RU), '12,7')
+ self.assertEqual(get_fmt(Decimal('12.7'), crazy), '1-2&7')
+
+ self.assertEqual(get_fmt(123456789, en_US), '123,456,789')
+ self.assertEqual(get_fmt(123456789, fr_FR), '123456789')
+ self.assertEqual(get_fmt(123456789, ru_RU), '123 456 789')
+ self.assertEqual(get_fmt(1234567890123, crazy), '123456-78-9012-3')
+
+ self.assertEqual(get_fmt(123456789, en_US, '.6n'), '1.23457e+8')
+ self.assertEqual(get_fmt(123456789, fr_FR, '.6n'), '1,23457e+8')
+ self.assertEqual(get_fmt(123456789, ru_RU, '.6n'), '1,23457e+8')
+ self.assertEqual(get_fmt(123456789, crazy, '.6n'), '1&23457e+8')
+
+
class DecimalArithmeticOperatorsTest(unittest.TestCase):
'''Unit tests for all arithmetic operators, binary and unary.'''
diff --git a/Misc/NEWS b/Misc/NEWS
index c506071..c59ba94 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -174,6 +174,9 @@ Core and Builtins
Library
-------
+- Issue #2110: Add support for thousands separator and 'n' type
+ specifier to Decimal.__format__
+
- Fix Decimal.__format__ bug that swapped the meanings of the '<' and
'>' alignment characters.