From 0923d1d8d7e428297461ed5145f06915c462b25b Mon Sep 17 00:00:00 2001 From: Eric Smith Date: Thu, 16 Apr 2009 20:16:10 +0000 Subject: The other half of Issue #1580: use short float repr where possible. Addresses the float -> string conversion, using David Gay's code which was added in Mark Dickinson's checkin r71663. Also addresses these, which are intertwined with the short repr changes: - Issue #5772: format(1e100, '<') produces '1e+100', not '1.0e+100' - Issue #5515: 'n' formatting with commas no longer works poorly with leading zeros. - PEP 378 Format Specifier for Thousands Separator: implemented for floats. --- Include/bytesobject.h | 26 +- Include/pystrtod.h | 19 ++ Include/unicodeobject.h | 26 +- Lib/test/test_float.py | 76 +++++ Lib/test/test_format.py | 5 + Lib/test/test_types.py | 49 ++- Misc/NEWS | 9 + Modules/_pickle.c | 31 +- Objects/bytesobject.c | 1 + Objects/complexobject.c | 167 ++++++---- Objects/floatobject.c | 145 ++------- Objects/stringlib/formatter.h | 718 ++++++++++++++++++++++++----------------- Objects/stringlib/localeutil.h | 269 ++++++++------- Objects/unicodeobject.c | 170 ++-------- Python/marshal.c | 26 +- Python/pystrtod.c | 596 ++++++++++++++++++++++++++++++---- 16 files changed, 1497 insertions(+), 836 deletions(-) diff --git a/Include/bytesobject.h b/Include/bytesobject.h index f5a5085..916e3f7 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -91,24 +91,22 @@ PyAPI_FUNC(int) PyBytes_AsStringAndSize( into the string pointed to by buffer. For the argument descriptions, see Objects/stringlib/localeutil.h */ -PyAPI_FUNC(int) _PyBytes_InsertThousandsGroupingLocale(char *buffer, - Py_ssize_t n_buffer, - Py_ssize_t n_digits, - Py_ssize_t buf_size, - Py_ssize_t *count, - int append_zero_char); +PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertThousandsGroupingLocale(char *buffer, + Py_ssize_t n_buffer, + char *digits, + Py_ssize_t n_digits, + Py_ssize_t min_width); /* Using explicit passed-in values, insert the thousands grouping into the string pointed to by buffer. For the argument descriptions, see Objects/stringlib/localeutil.h */ -PyAPI_FUNC(int) _PyBytes_InsertThousandsGrouping(char *buffer, - Py_ssize_t n_buffer, - Py_ssize_t n_digits, - Py_ssize_t buf_size, - Py_ssize_t *count, - int append_zero_char, - const char *grouping, - const char *thousands_sep); +PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertThousandsGrouping(char *buffer, + Py_ssize_t n_buffer, + char *digits, + Py_ssize_t n_digits, + Py_ssize_t min_width, + const char *grouping, + const char *thousands_sep); /* Flags used by string formatting */ #define F_LJUST (1<<0) diff --git a/Include/pystrtod.h b/Include/pystrtod.h index c6921da..1caa7ae 100644 --- a/Include/pystrtod.h +++ b/Include/pystrtod.h @@ -10,6 +10,25 @@ PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr); PyAPI_FUNC(double) PyOS_ascii_atof(const char *str); PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len, const char *format, double d); +/* The caller is responsible for calling PyMem_Free to free the buffer + that's is returned. */ +PyAPI_FUNC(char *) PyOS_double_to_string(double val, + char format_code, + int precision, + int flags, + int *type); + + +/* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */ +#define Py_DTSF_SIGN 0x01 /* always add the sign */ +#define Py_DTSF_ADD_DOT_0 0x02 /* if the result is an integer add ".0" */ +#define Py_DTSF_ALT 0x04 /* "alternate" formatting. it's format_code + specific */ + +/* PyOS_double_to_string's "type", if non-NULL, will be set to one of: */ +#define Py_DTST_FINITE 0 +#define Py_DTST_INFINITE 1 +#define Py_DTST_NAN 2 #ifdef __cplusplus } diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 98c0372..696c1c7 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -1482,24 +1482,22 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( into the string pointed to by buffer. For the argument descriptions, see Objects/stringlib/localeutil.h */ -PyAPI_FUNC(int) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer, - Py_ssize_t n_buffer, - Py_ssize_t n_digits, - Py_ssize_t buf_size, - Py_ssize_t *count, - int append_zero_char); +PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer, + Py_ssize_t n_buffer, + Py_UNICODE *digits, + Py_ssize_t n_digits, + Py_ssize_t min_width); /* Using explicit passed-in values, insert the thousands grouping into the string pointed to by buffer. For the argument descriptions, see Objects/stringlib/localeutil.h */ -PyAPI_FUNC(int) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer, - Py_ssize_t n_buffer, - Py_ssize_t n_digits, - Py_ssize_t buf_size, - Py_ssize_t *count, - int append_zero_char, - const char *grouping, - const char *thousands_sep); +PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer, + Py_ssize_t n_buffer, + Py_UNICODE *digits, + Py_ssize_t n_digits, + Py_ssize_t min_width, + const char *grouping, + const char *thousands_sep); /* === Characters Type APIs =============================================== */ /* Helper array used by Py_UNICODE_ISSPACE(). */ diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 123a69d..8c250e8 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -1,6 +1,7 @@ import unittest, struct import os +import sys from test import support import math from math import isinf, isnan, copysign, ldexp @@ -10,6 +11,10 @@ import random, fractions INF = float("inf") NAN = float("nan") +#locate file with float format test values +test_dir = os.path.dirname(__file__) or os.curdir +format_testfile = os.path.join(test_dir, 'formatfloat_testcases.txt') + class GeneralFloatCases(unittest.TestCase): def test_float(self): @@ -24,6 +29,10 @@ class GeneralFloatCases(unittest.TestCase): self.assertRaises(ValueError, float, "+-3.14") self.assertRaises(ValueError, float, "-+3.14") self.assertRaises(ValueError, float, "--3.14") + self.assertRaises(ValueError, float, ".nan") + self.assertRaises(ValueError, float, "+.inf") + self.assertRaises(ValueError, float, ".") + self.assertRaises(ValueError, float, "-.") self.assertEqual(float(b" \u0663.\u0661\u0664 ".decode('raw-unicode-escape')), 3.14) @support.run_with_locale('LC_NUMERIC', 'fr_FR', 'de_DE') @@ -316,6 +325,73 @@ class ReprTestCase(unittest.TestCase): self.assertEqual(v, eval(repr(v))) floats_file.close() +class FormatTestCase(unittest.TestCase): + @unittest.skipUnless(float.__getformat__("double").startswith("IEEE"), + "test requires IEEE 754 doubles") + def test_format_testfile(self): + for line in open(format_testfile): + if line.startswith('--'): + continue + line = line.strip() + if not line: + continue + + lhs, rhs = map(str.strip, line.split('->')) + fmt, arg = lhs.split() + self.assertEqual(fmt % float(arg), rhs) + self.assertEqual(fmt % -float(arg), '-' + rhs) + + @unittest.skipUnless(getattr(sys, 'float_repr_style', '') == 'short', + "applies only when using short float repr style") + def test_short_repr(self): + # test short float repr introduced in Python 3.1. One aspect + # of this repr is that we get some degree of str -> float -> + # str roundtripping. In particular, for any numeric string + # containing 15 or fewer significant digits, those exact same + # digits (modulo trailing zeros) should appear in the output. + # No more repr(0.03) -> "0.029999999999999999"! + + test_strings = [ + # output always includes *either* a decimal point and at + # least one digit after that point, or an exponent. + '0.0', + '1.0', + '0.01', + '0.02', + '0.03', + '0.04', + '0.05', + '1.23456789', + '10.0', + '100.0', + # values >= 1e16 get an exponent... + '1000000000000000.0', + '9999999999999990.0', + '1e+16', + '1e+17', + # ... and so do values < 1e-4 + '0.001', + '0.001001', + '0.00010000000000001', + '0.0001', + '9.999999999999e-05', + '1e-05', + # values designed to provoke failure if the FPU rounding + # precision isn't set correctly + '8.72293771110361e+25', + '7.47005307342313e+26', + '2.86438000439698e+28', + '8.89142905246179e+28', + '3.08578087079232e+35', + ] + + for s in test_strings: + negs = '-'+s + self.assertEqual(s, repr(float(s))) + self.assertEqual(negs, repr(float(negs))) + + + # Beginning with Python 2.6 float has cross platform compatible # ways to create and represent inf and nan class InfNanTest(unittest.TestCase): diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index c90f66d..054baf6 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -220,6 +220,11 @@ class FormatTest(unittest.TestCase): testformat("%a", "\u0378", "'\\u0378'") # non printable testformat("%r", "\u0374", "'\u0374'") # printable testformat("%a", "\u0374", "'\\u0374'") # printable + + # alternate float formatting + testformat('%g', 1.1, '1.1') + testformat('%#g', 1.1, '1.10000') + # Test exception for unknown format characters if verbose: print('Testing exceptions') diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 0d4d1b3..5d41e1b 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -113,6 +113,9 @@ class TypesTests(unittest.TestCase): self.assertEqual(1.5e-101.__format__('e'), '1.500000e-101') self.assertEqual('%e' % 1.5e-101, '1.500000e-101') + self.assertEqual('%g' % 1.0, '1') + self.assertEqual('%#g' % 1.0, '1.00000') + def test_normal_integers(self): # Ensure the first 256 integers are shared a = 256 @@ -358,6 +361,8 @@ class TypesTests(unittest.TestCase): self.assertRaises(TypeError, 3 .__format__, 0) # can't have ',' with 'n' self.assertRaises(ValueError, 3 .__format__, ",n") + # can't have ',' with 'c' + self.assertRaises(ValueError, 3 .__format__, ",c") # ensure that only int and float type specifiers work for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] + @@ -547,10 +552,34 @@ class TypesTests(unittest.TestCase): # a totaly empty format specifier means something else. # So, just use a sign flag test(1e200, '+g', '+1e+200') - test(1e200, '+', '+1.0e+200') + test(1e200, '+', '+1e+200') + test(1.1e200, '+g', '+1.1e+200') test(1.1e200, '+', '+1.1e+200') + # 0 padding + test(1234., '010f', '1234.000000') + test(1234., '011f', '1234.000000') + test(1234., '012f', '01234.000000') + test(-1234., '011f', '-1234.000000') + test(-1234., '012f', '-1234.000000') + test(-1234., '013f', '-01234.000000') + test(-1234.12341234, '013f', '-01234.123412') + test(-123456.12341234, '011.2f', '-0123456.12') + + # 0 padding with commas + test(1234., '011,f', '1,234.000000') + test(1234., '012,f', '1,234.000000') + test(1234., '013,f', '01,234.000000') + test(-1234., '012,f', '-1,234.000000') + test(-1234., '013,f', '-1,234.000000') + test(-1234., '014,f', '-01,234.000000') + test(-12345., '015,f', '-012,345.000000') + test(-123456., '016,f', '-0,123,456.000000') + test(-123456., '017,f', '-0,123,456.000000') + test(-123456.12341234, '017,f', '-0,123,456.123412') + test(-123456.12341234, '013,.2f', '-0,123,456.12') + # % formatting test(-1.0, '%', '-100.000000%') @@ -575,6 +604,24 @@ class TypesTests(unittest.TestCase): self.assertRaises(ValueError, format, 0.0, '#') self.assertRaises(ValueError, format, 0.0, '#20f') + def test_format_spec_errors(self): + # int, float, and string all share the same format spec + # mini-language parser. + + # Check that we can't ask for too many digits. This is + # probably a CPython specific test. It tries to put the width + # into a C long. + self.assertRaises(ValueError, format, 0, '1'*10000 + 'd') + + # Similar with the precision. + self.assertRaises(ValueError, format, 0, '.' + '1'*10000 + 'd') + + # And may as well test both. + self.assertRaises(ValueError, format, 0, '1'*1000 + '.' + '1'*10000 + 'd') + + # Make sure commas aren't allowed with various type codes + for code in 'xXobns': + self.assertRaises(ValueError, format, 0, ',' + code) def test_main(): run_unittest(TypesTests) diff --git a/Misc/NEWS b/Misc/NEWS index f9c8ada..a471725 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,15 @@ What's New in Python 3.1 beta 1? Core and Builtins ----------------- +- Issue #5772: format(1e100, '<') produces '1e+100', not '1.0e+100'. + +- Issue #5515: str.format() presentation type 'n' with commas no + longer works poorly with leading zeros when formatting ints and + floats. + +- Implement PEP 378, Format Specifier for Thousands Separator, for + floats. + - The repr function switches to exponential notation at 1e16, not 1e17 as it did before. This change applies to both 'short' and legacy float repr styles. For the new repr style, it avoids misleading diff --git a/Modules/_pickle.c b/Modules/_pickle.c index bda8efd..20ab525 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1016,16 +1016,31 @@ save_float(PicklerObject *self, PyObject *obj) return -1; if (pickler_write(self, pdata, 9) < 0) return -1; - } + } else { - char pdata[250]; - pdata[0] = FLOAT; - PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x); - /* Extend the formatted string with a newline character */ - strcat(pdata, "\n"); + int result = -1; + char *buf = NULL; + char op = FLOAT; - if (pickler_write(self, pdata, strlen(pdata)) < 0) - return -1; + if (pickler_write(self, &op, 1) < 0) + goto done; + + buf = PyOS_double_to_string(x, 'r', 0, 0, NULL); + if (!buf) { + PyErr_NoMemory(); + goto done; + } + + if (pickler_write(self, buf, strlen(buf)) < 0) + goto done; + + if (pickler_write(self, "\n", 1) < 0) + goto done; + + result = 0; +done: + PyMem_Free(buf); + return result; } return 0; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 1239680..f306cb8 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -562,6 +562,7 @@ PyBytes_AsStringAndSize(register PyObject *obj, /* -------------------------------------------------------------------- */ /* Methods */ +#include "stringlib/stringdefs.h" #define STRINGLIB_CHAR char #define STRINGLIB_CMP memcmp diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 207ecdd0..721db8f 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -14,22 +14,6 @@ #ifndef WITHOUT_COMPLEX -/* Precisions used by repr() and str(), respectively. - - The repr() precision (17 significant decimal digits) is the minimal number - that is guaranteed to have enough precision so that if the number is read - back in the exact same binary value is recreated. This is true for IEEE - floating point by design, and also happens to work for all other modern - hardware. - - The str() precision is chosen so that in most cases, the rounding noise - created by various operations is suppressed, while giving plenty of - precision for practical use. -*/ - -#define PREC_REPR 17 -#define PREC_STR 12 - /* elementary operations on complex numbers */ static Py_complex c_1 = {1., 0.}; @@ -345,71 +329,114 @@ complex_dealloc(PyObject *op) } -static void -complex_to_buf(char *buf, int bufsz, PyComplexObject *v, int precision) +static PyObject * +complex_format(PyComplexObject *v, char format_code) { - char format[32]; - if (v->cval.real == 0.) { - if (!Py_IS_FINITE(v->cval.imag)) { - if (Py_IS_NAN(v->cval.imag)) - strncpy(buf, "nan*j", 6); - else if (copysign(1, v->cval.imag) == 1) - strncpy(buf, "inf*j", 6); - else - strncpy(buf, "-inf*j", 7); - } - else { - PyOS_snprintf(format, sizeof(format), "%%.%ig", precision); - PyOS_ascii_formatd(buf, bufsz - 1, format, v->cval.imag); - strncat(buf, "j", 1); - } - } else { - char re[64], im[64]; - /* Format imaginary part with sign, real part without */ - if (!Py_IS_FINITE(v->cval.real)) { - if (Py_IS_NAN(v->cval.real)) - strncpy(re, "nan", 4); - /* else if (copysign(1, v->cval.real) == 1) */ - else if (v->cval.real > 0) - strncpy(re, "inf", 4); - else - strncpy(re, "-inf", 5); - } - else { - PyOS_snprintf(format, sizeof(format), "%%.%ig", precision); - PyOS_ascii_formatd(re, sizeof(re), format, v->cval.real); - } - if (!Py_IS_FINITE(v->cval.imag)) { - if (Py_IS_NAN(v->cval.imag)) - strncpy(im, "+nan*", 6); - /* else if (copysign(1, v->cval.imag) == 1) */ - else if (v->cval.imag > 0) - strncpy(im, "+inf*", 6); - else - strncpy(im, "-inf*", 6); - } - else { - PyOS_snprintf(format, sizeof(format), "%%+.%ig", precision); - PyOS_ascii_formatd(im, sizeof(im), format, v->cval.imag); - } - PyOS_snprintf(buf, bufsz, "(%s%sj)", re, im); - } + PyObject *result = NULL; + Py_ssize_t len; + + /* If these are non-NULL, they'll need to be freed. */ + char *pre = NULL; + char *pim = NULL; + char *buf = NULL; + + /* These do not need to be freed. They're either aliases for pim + and pre, or pointers to constants. */ + char *re = NULL; + char *im = NULL; + char *lead = ""; + char *tail = ""; + + + if (v->cval.real == 0.) { + re = ""; + if (!Py_IS_FINITE(v->cval.imag)) { + if (Py_IS_NAN(v->cval.imag)) + im = "nan*"; + else if (copysign(1, v->cval.imag) == 1) + im = "inf*"; + else + im = "-inf*"; + } + else { + pim = PyOS_double_to_string(v->cval.imag, format_code, + 0, 0, NULL); + if (!pim) { + PyErr_NoMemory(); + goto done; + } + im = pim; + } + } else { + /* Format imaginary part with sign, real part without */ + if (!Py_IS_FINITE(v->cval.real)) { + if (Py_IS_NAN(v->cval.real)) + re = "nan"; + /* else if (copysign(1, v->cval.real) == 1) */ + else if (v->cval.real > 0) + re = "inf"; + else + re = "-inf"; + } + else { + pre = PyOS_double_to_string(v->cval.real, format_code, + 0, 0, NULL); + if (!pre) { + PyErr_NoMemory(); + goto done; + } + re = pre; + } + + if (!Py_IS_FINITE(v->cval.imag)) { + if (Py_IS_NAN(v->cval.imag)) + im = "+nan*"; + /* else if (copysign(1, v->cval.imag) == 1) */ + else if (v->cval.imag > 0) + im = "+inf*"; + else + im = "-inf*"; + } + else { + pim = PyOS_double_to_string(v->cval.imag, format_code, + 0, Py_DTSF_SIGN, NULL); + if (!pim) { + PyErr_NoMemory(); + goto done; + } + im = pim; + } + lead = "("; + tail = ")"; + } + /* Alloc the final buffer. Add one for the "j" in the format string, and + one for the trailing zero. */ + len = strlen(lead) + strlen(re) + strlen(im) + strlen(tail) + 2; + buf = PyMem_Malloc(len); + if (!buf) { + PyErr_NoMemory(); + goto done; + } + PyOS_snprintf(buf, len, "%s%s%sj%s", lead, re, im, tail); + result = PyUnicode_FromString(buf); +done: + PyMem_Free(pim); + PyMem_Free(pre); + PyMem_Free(buf); + + return result; } static PyObject * complex_repr(PyComplexObject *v) { - char buf[100]; - complex_to_buf(buf, sizeof(buf), v, PREC_REPR); - return PyUnicode_FromString(buf); + return complex_format(v, 'r'); } static PyObject * complex_str(PyComplexObject *v) { - char buf[100]; - complex_to_buf(buf, sizeof(buf), v, PREC_STR); - return PyUnicode_FromString(buf); + return complex_format(v, 's'); } static long diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 2ef4d1a..2fbe810 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -197,8 +197,7 @@ PyFloat_FromString(PyObject *v) sp = s; /* We don't care about overflow or underflow. If the platform supports * them, infinities and signed zeroes (on underflow) are fine. - * However, strtod can return 0 for denormalized numbers, where atof - * does not. So (alas!) we special-case a zero result. Note that + * However, strtod can return 0 for denormalized numbers. Note that * whether strtod sets errno on underflow is not defined, so we can't * key off errno. */ @@ -259,14 +258,6 @@ PyFloat_FromString(PyObject *v) "null byte in argument for float()"); goto error; } - if (x == 0.0) { - /* See above -- may have been strtod being anal - about denorms. */ - PyFPE_START_PROTECT("atof", goto error) - x = PyOS_ascii_atof(s); - PyFPE_END_PROTECT(x) - errno = 0; /* whether atof ever set errno is undefined */ - } result = PyFloat_FromDouble(x); error: if (s_buffer) @@ -320,72 +311,6 @@ PyFloat_AsDouble(PyObject *op) return val; } -/* Methods */ - -static void -format_double(char *buf, size_t buflen, double ob_fval, int precision) -{ - register char *cp; - char format[32]; - int i; - - /* Subroutine for float_repr, float_str and float_print. - We want float numbers to be recognizable as such, - i.e., they should contain a decimal point or an exponent. - However, %g may print the number as an integer; - in such cases, we append ".0" to the string. */ - - PyOS_snprintf(format, 32, "%%.%ig", precision); - PyOS_ascii_formatd(buf, buflen, format, ob_fval); - cp = buf; - if (*cp == '-') - cp++; - for (; *cp != '\0'; cp++) { - /* Any non-digit means it's not an integer; - this takes care of NAN and INF as well. */ - if (!isdigit(Py_CHARMASK(*cp))) - break; - } - if (*cp == '\0') { - *cp++ = '.'; - *cp++ = '0'; - *cp++ = '\0'; - return; - } - /* Checking the next three chars should be more than enough to - * detect inf or nan, even on Windows. We check for inf or nan - * at last because they are rare cases. - */ - for (i=0; *cp != '\0' && i<3; cp++, i++) { - if (isdigit(Py_CHARMASK(*cp)) || *cp == '.') - continue; - /* found something that is neither a digit nor point - * it might be a NaN or INF - */ -#ifdef Py_NAN - if (Py_IS_NAN(ob_fval)) { - strcpy(buf, "nan"); - } - else -#endif - if (Py_IS_INFINITY(ob_fval)) { - cp = buf; - if (*cp == '-') - cp++; - strcpy(cp, "inf"); - } - break; - } - -} - -static void -format_float(char *buf, size_t buflen, PyFloatObject *v, int precision) -{ - assert(PyFloat_Check(v)); - format_double(buf, buflen, PyFloat_AS_DOUBLE(v), precision); -} - /* Macro and helper that convert PyObject obj to a C double and store the value in dbl. If conversion to double raises an exception, obj is set to NULL, and the function invoking this macro returns NULL. If @@ -398,6 +323,8 @@ format_float(char *buf, size_t buflen, PyFloatObject *v, int precision) else if (convert_to_double(&(obj), &(dbl)) < 0) \ return obj; +/* Methods */ + static int convert_to_double(PyObject **v, double *dbl) { @@ -418,38 +345,30 @@ convert_to_double(PyObject **v, double *dbl) return 0; } -/* Precisions used by repr() and str(), respectively. - - The repr() precision (17 significant decimal digits) is the minimal number - that is guaranteed to have enough precision so that if the number is read - back in the exact same binary value is recreated. This is true for IEEE - floating point by design, and also happens to work for all other modern - hardware. - - The str() precision is chosen so that in most cases, the rounding noise - created by various operations is suppressed, while giving plenty of - precision for practical use. - -*/ - -#define PREC_REPR 17 -#define PREC_STR 12 +static PyObject * +float_str_or_repr(PyFloatObject *v, char format_code) +{ + PyObject *result; + char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v), + format_code, 0, Py_DTSF_ADD_DOT_0, + NULL); + if (!buf) + return PyErr_NoMemory(); + result = PyUnicode_FromString(buf); + PyMem_Free(buf); + return result; +} static PyObject * float_repr(PyFloatObject *v) { - char buf[100]; - format_float(buf, sizeof(buf), v, PREC_REPR); - - return PyUnicode_FromString(buf); + return float_str_or_repr(v, 'r'); } static PyObject * float_str(PyFloatObject *v) { - char buf[100]; - format_float(buf, sizeof(buf), v, PREC_STR); - return PyUnicode_FromString(buf); + return float_str_or_repr(v, 's'); } /* Comparison is pretty much a nightmare. When comparing float to float, @@ -1980,15 +1899,21 @@ PyFloat_Fini(void) i++, p++) { if (PyFloat_CheckExact(p) && Py_REFCNT(p) != 0) { - char buf[100]; - format_float(buf, sizeof(buf), p, PREC_STR); - /* XXX(twouters) cast refcount to - long until %zd is universally - available - */ - fprintf(stderr, + char *buf = PyOS_double_to_string( + PyFloat_AS_DOUBLE(p), 'r', + 0, 0, NULL); + if (buf) { + /* XXX(twouters) cast + refcount to long + until %zd is + universally + available + */ + fprintf(stderr, "# \n", p, (long)Py_REFCNT(p), buf); + PyMem_Free(buf); + } } } list = list->next; @@ -2233,14 +2158,6 @@ _PyFloat_Pack8(double x, unsigned char *p, int le) } } -/* Should only be used by marshal. */ -int -_PyFloat_Repr(double x, char *p, size_t len) -{ - format_double(p, len, x, PREC_REPR); - return (int)strlen(p); -} - double _PyFloat_Unpack4(const unsigned char *p, int le) { diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h index d936a67..61ca12b 100644 --- a/Objects/stringlib/formatter.h +++ b/Objects/stringlib/formatter.h @@ -1,6 +1,8 @@ /* implements the string, long, and float formatters. that is, string.__format__, etc. */ +#include + /* Before including this, you must include either: stringlib/unicodedefs.h stringlib/stringdefs.h @@ -13,8 +15,6 @@ be. These are the only non-static functions defined here. */ -#define ALLOW_PARENS_FOR_SIGN 0 - /* Raises an exception about an unknown presentation type for this * type. */ @@ -104,9 +104,6 @@ is_sign_element(STRINGLIB_CHAR c) { switch (c) { case ' ': case '+': case '-': -#if ALLOW_PARENS_FOR_SIGN - case '(': -#endif return 1; default: return 0; @@ -143,7 +140,7 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, /* end-ptr is used throughout this code to specify the length of the input string */ - Py_ssize_t specified_width; + Py_ssize_t consumed; format->fill_char = '\0'; format->align = '\0'; @@ -170,11 +167,6 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, if (end-ptr >= 1 && is_sign_element(ptr[0])) { format->sign = ptr[0]; ++ptr; -#if ALLOW_PARENS_FOR_SIGN - if (end-ptr >= 1 && ptr[0] == ')') { - ++ptr; - } -#endif } /* If the next character is #, we're in alternate mode. This only @@ -193,15 +185,17 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, ++ptr; } - /* XXX add error checking */ - specified_width = get_integer(&ptr, end, &format->width); + consumed = get_integer(&ptr, end, &format->width); + if (consumed == -1) + /* Overflow error. Exception already set. */ + return 0; - /* if specified_width is 0, we didn't consume any characters for - the width. in that case, reset the width to -1, because - get_integer() will have set it to zero */ - if (specified_width == 0) { + /* If consumed is 0, we didn't consume any characters for the + width. In that case, reset the width to -1, because + get_integer() will have set it to zero. -1 is how we record + that the width wasn't specified. */ + if (consumed == 0) format->width = -1; - } /* Comma signifies add thousands separators */ if (end-ptr && ptr[0] == ',') { @@ -213,11 +207,13 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, if (end-ptr && ptr[0] == '.') { ++ptr; - /* XXX add error checking */ - specified_width = get_integer(&ptr, end, &format->precision); + consumed = get_integer(&ptr, end, &format->precision); + if (consumed == -1) + /* Overflow error. Exception already set. */ + return 0; - /* not having a precision after a dot is an error */ - if (specified_width == 0) { + /* Not having a precision after a dot is an error. */ + if (consumed == 0) { PyErr_Format(PyExc_ValueError, "Format specifier missing precision"); return 0; @@ -225,10 +221,10 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, } - /* Finally, parse the type field */ + /* Finally, parse the type field. */ if (end-ptr > 1) { - /* invalid conversion spec */ + /* More than one char remain, invalid conversion spec. */ PyErr_Format(PyExc_ValueError, "Invalid conversion specification"); return 0; } @@ -238,9 +234,27 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, ++ptr; } - if (format->type == 'n' && format->thousands_separators) { - PyErr_Format(PyExc_ValueError, "Cannot specify ',' with 'n'."); - return 0; + /* Do as much validating as we can, just by looking at the format + specifier. Do not take into account what type of formatting + we're doing (int, float, string). */ + + if (format->thousands_separators) { + switch (format->type) { + case 'd': + case 'e': + case 'f': + case 'g': + case 'E': + case 'G': + case '%': + case 'F': + /* These are allowed. See PEP 378.*/ + break; + default: + PyErr_Format(PyExc_ValueError, + "Cannot specify ',' with '%c'.", format->type); + return 0; + } } return 1; @@ -251,6 +265,20 @@ parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, /*********** common routines for numeric formatting *********************/ /************************************************************************/ +/* Locale type codes. */ +#define LT_CURRENT_LOCALE 0 +#define LT_DEFAULT_LOCALE 1 +#define LT_NO_LOCALE 2 + +/* Locale info needed for formatting integers and the part of floats + before and including the decimal. Note that locales only support + 8-bit chars, not unicode. */ +typedef struct { + char *decimal_point; + char *thousands_sep; + char *grouping; +} LocaleInfo; + /* describes the layout for an integer, see the comment in calc_number_widths() for details */ typedef struct { @@ -258,38 +286,84 @@ typedef struct { Py_ssize_t n_prefix; Py_ssize_t n_spadding; Py_ssize_t n_rpadding; - char lsign; - Py_ssize_t n_lsign; - char rsign; - Py_ssize_t n_rsign; - Py_ssize_t n_total; /* just a convenience, it's derivable from the - other fields */ + char sign; + Py_ssize_t n_sign; /* number of digits needed for sign (0/1) */ + Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including + any grouping chars. */ + Py_ssize_t n_decimal; /* 0 if only an integer */ + Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part, + excluding the decimal itself, if + present. */ + + /* These 2 are not the widths of fields, but are needed by + STRINGLIB_GROUPING. */ + Py_ssize_t n_digits; /* The number of digits before a decimal + or exponent. */ + Py_ssize_t n_min_width; /* The min_width we used when we computed + the n_grouped_digits width. */ } NumberFieldWidths; +/* Given a number of the form: + digits[remainder] + where ptr points to the start and end points to the end, find where + the integer part ends. This could be a decimal, an exponent, both, + or neither. + If a decimal point is present, set *has_decimal and increment + remainder beyond it. + Results are undefined (but shouldn't crash) for improperly + formatted strings. +*/ +static void +parse_number(STRINGLIB_CHAR *ptr, Py_ssize_t len, + Py_ssize_t *n_remainder, int *has_decimal) +{ + STRINGLIB_CHAR *end = ptr + len; + STRINGLIB_CHAR *remainder; + + while (ptrn_digits = n_number - n_remainder - (has_decimal?1:0); spec->n_lpadding = 0; - spec->n_prefix = 0; + spec->n_prefix = n_prefix; + spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0; + spec->n_remainder = n_remainder; spec->n_spadding = 0; spec->n_rpadding = 0; - spec->lsign = '\0'; - spec->n_lsign = 0; - spec->rsign = '\0'; - spec->n_rsign = 0; + spec->sign = '\0'; + spec->n_sign = 0; /* the output will look like: - | | - | | - | | + | | + | | + | | - lsign and rsign are computed from format->sign and the actual + sign is computed from format->sign and the actual sign of the number prefix is given (it's for the '0x' prefix) @@ -304,108 +378,191 @@ calc_number_widths(NumberFieldWidths *spec, STRINGLIB_CHAR actual_sign, */ /* compute the various parts we're going to write */ - if (format->sign == '+') { + switch (format->sign) { + case '+': /* always put a + or - */ - spec->n_lsign = 1; - spec->lsign = (actual_sign == '-' ? '-' : '+'); - } -#if ALLOW_PARENS_FOR_SIGN - else if (format->sign == '(') { - if (actual_sign == '-') { - spec->n_lsign = 1; - spec->lsign = '('; - spec->n_rsign = 1; - spec->rsign = ')'; - } - } -#endif - else if (format->sign == ' ') { - spec->n_lsign = 1; - spec->lsign = (actual_sign == '-' ? '-' : ' '); - } - else { - /* non specified, or the default (-) */ - if (actual_sign == '-') { - spec->n_lsign = 1; - spec->lsign = '-'; + spec->n_sign = 1; + spec->sign = (sign_char == '-' ? '-' : '+'); + break; + case ' ': + spec->n_sign = 1; + spec->sign = (sign_char == '-' ? '-' : ' '); + break; + default: + /* Not specified, or the default (-) */ + if (sign_char == '-') { + spec->n_sign = 1; + spec->sign = '-'; } } - spec->n_prefix = n_prefix; + /* The number of chars used for non-digits and non-padding. */ + n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal + + spec->n_remainder; - /* now the number of padding characters */ - if (format->width == -1) { - /* no padding at all, nothing to do */ - } - else { - /* see if any padding is needed */ - if (spec->n_lsign + n_digits + spec->n_rsign + - spec->n_prefix >= format->width) { - /* no padding needed, we're already bigger than the - requested width */ - } - else { - /* determine which of left, space, or right padding is - needed */ - Py_ssize_t padding = format->width - - (spec->n_lsign + spec->n_prefix + - n_digits + spec->n_rsign); - if (format->align == '<') - spec->n_rpadding = padding; - else if (format->align == '>') - spec->n_lpadding = padding; - else if (format->align == '^') { - spec->n_lpadding = padding / 2; - spec->n_rpadding = padding - spec->n_lpadding; - } - else if (format->align == '=') - spec->n_spadding = padding; - else - spec->n_lpadding = padding; + /* min_width can go negative, that's okay. format->width == -1 means + we don't care. */ + if (format->fill_char == '0') + spec->n_min_width = format->width - n_non_digit_non_padding; + else + spec->n_min_width = 0; + + if (spec->n_digits == 0) + /* This case only occurs when using 'c' formatting, we need + to special case it because the grouping code always wants + to have at least one character. */ + spec->n_grouped_digits = 0; + else + spec->n_grouped_digits = STRINGLIB_GROUPING(NULL, 0, NULL, + spec->n_digits, + spec->n_min_width, + locale->grouping, + locale->thousands_sep); + + /* Given the desired width and the total of digit and non-digit + space we consume, see if we need any padding. format->width can + be negative (meaning no padding), but this code still works in + that case. */ + n_padding = format->width - + (n_non_digit_non_padding + spec->n_grouped_digits); + if (n_padding > 0) { + /* Some padding is needed. Determine if it's left, space, or right. */ + switch (format->align) { + case '<': + spec->n_rpadding = n_padding; + break; + case '^': + spec->n_lpadding = n_padding / 2; + spec->n_rpadding = n_padding - spec->n_lpadding; + break; + case '=': + spec->n_spadding = n_padding; + break; + default: + /* Handles '>', plus catch-all just in case. */ + spec->n_lpadding = n_padding; + break; } } - spec->n_total = spec->n_lpadding + spec->n_lsign + spec->n_prefix + - spec->n_spadding + n_digits + spec->n_rsign + spec->n_rpadding; + return spec->n_lpadding + spec->n_sign + spec->n_prefix + + spec->n_spadding + spec->n_grouped_digits + spec->n_decimal + + spec->n_remainder + spec->n_rpadding; } -/* fill in the non-digit parts of a numbers's string representation, - as determined in calc_number_widths(). returns the pointer to - where the digits go. */ -static STRINGLIB_CHAR * -fill_non_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec, - STRINGLIB_CHAR *prefix, Py_ssize_t n_digits, - STRINGLIB_CHAR fill_char) +/* Fill in the digit parts of a numbers's string representation, + as determined in calc_number_widths(). + No error checking, since we know the buffer is the correct size. */ +static void +fill_number(STRINGLIB_CHAR *buf, const NumberFieldWidths *spec, + STRINGLIB_CHAR *digits, Py_ssize_t n_digits, + STRINGLIB_CHAR *prefix, STRINGLIB_CHAR fill_char, + LocaleInfo *locale, int toupper) { - STRINGLIB_CHAR *p_digits; + /* Used to keep track of digits, decimal, and remainder. */ + STRINGLIB_CHAR *p = digits; + +#ifndef NDEBUG + Py_ssize_t r; +#endif if (spec->n_lpadding) { - STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding); - p_buf += spec->n_lpadding; + STRINGLIB_FILL(buf, fill_char, spec->n_lpadding); + buf += spec->n_lpadding; } - if (spec->n_lsign == 1) { - *p_buf++ = spec->lsign; + if (spec->n_sign == 1) { + *buf++ = spec->sign; } if (spec->n_prefix) { - memmove(p_buf, + memmove(buf, prefix, spec->n_prefix * sizeof(STRINGLIB_CHAR)); - p_buf += spec->n_prefix; + if (toupper) { + Py_ssize_t t; + for (t = 0; t < spec->n_prefix; ++t) + buf[t] = STRINGLIB_TOUPPER(buf[t]); + } + buf += spec->n_prefix; } if (spec->n_spadding) { - STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding); - p_buf += spec->n_spadding; + STRINGLIB_FILL(buf, fill_char, spec->n_spadding); + buf += spec->n_spadding; } - p_digits = p_buf; - p_buf += n_digits; - if (spec->n_rsign == 1) { - *p_buf++ = spec->rsign; + + /* Only for type 'c' special case, it has no digits. */ + if (spec->n_digits != 0) { + /* Fill the digits with InsertThousandsGrouping. */ +#ifndef NDEBUG + r = +#endif + STRINGLIB_GROUPING(buf, spec->n_grouped_digits, digits, + spec->n_digits, spec->n_min_width, + locale->grouping, locale->thousands_sep); +#ifndef NDEBUG + assert(r == spec->n_grouped_digits); +#endif + p += spec->n_digits; + } + if (toupper) { + Py_ssize_t t; + for (t = 0; t < spec->n_grouped_digits; ++t) + buf[t] = STRINGLIB_TOUPPER(buf[t]); + } + buf += spec->n_grouped_digits; + + if (spec->n_decimal) { + Py_ssize_t t; + for (t = 0; t < spec->n_decimal; ++t) + buf[t] = locale->decimal_point[t]; + buf += spec->n_decimal; + p += 1; + } + + if (spec->n_remainder) { + memcpy(buf, p, spec->n_remainder * sizeof(STRINGLIB_CHAR)); + buf += spec->n_remainder; + p += spec->n_remainder; } + if (spec->n_rpadding) { - STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding); - p_buf += spec->n_rpadding; + STRINGLIB_FILL(buf, fill_char, spec->n_rpadding); + buf += spec->n_rpadding; + } +} + +static char no_grouping[1] = {CHAR_MAX}; + +/* Find the decimal point character(s?), thousands_separator(s?), and + grouping description, either for the current locale if type is + LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or + none if LT_NO_LOCALE. */ +static void +get_locale_info(int type, LocaleInfo *locale_info) +{ + switch (type) { + case LT_CURRENT_LOCALE: { + struct lconv *locale_data = localeconv(); + locale_info->decimal_point = locale_data->decimal_point; + locale_info->thousands_sep = locale_data->thousands_sep; + locale_info->grouping = locale_data->grouping; + break; + } + case LT_DEFAULT_LOCALE: + locale_info->decimal_point = "."; + locale_info->thousands_sep = ","; + locale_info->grouping = "\3"; /* Group every 3 characters, + trailing 0 means repeat + infinitely. */ + break; + case LT_NO_LOCALE: + locale_info->decimal_point = "."; + locale_info->thousands_sep = ""; + locale_info->grouping = no_grouping; + break; + default: + assert(0); } - return p_digits; } + #endif /* FORMAT_FLOAT || FORMAT_LONG */ /************************************************************************/ @@ -523,19 +680,21 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, PyObject *tmp = NULL; STRINGLIB_CHAR *pnumeric_chars; STRINGLIB_CHAR numeric_char; - STRINGLIB_CHAR sign = '\0'; - STRINGLIB_CHAR *p; + STRINGLIB_CHAR sign_char = '\0'; Py_ssize_t n_digits; /* count of digits need from the computed string */ - Py_ssize_t n_leading_chars; - Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to - allocate, used for 'n' - formatting. */ + Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which + produces non-digits */ Py_ssize_t n_prefix = 0; /* Count of prefix chars, (e.g., '0x') */ + Py_ssize_t n_total; STRINGLIB_CHAR *prefix = NULL; NumberFieldWidths spec; long x; + /* Locale settings, either from the actual locale or + from a hard-code pseudo-locale */ + LocaleInfo locale; + /* no precision allowed on integers */ if (format->precision != -1) { PyErr_SetString(PyExc_ValueError, @@ -543,7 +702,6 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, goto done; } - /* special case for character formatting */ if (format->type == 'c') { /* error to specify a sign */ @@ -554,6 +712,14 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, goto done; } + /* Error to specify a comma. */ + if (format->thousands_separators) { + PyErr_SetString(PyExc_ValueError, + "Thousands separators not allowed with integer" + " format specifier 'c'"); + goto done; + } + /* taken from unicodeobject.c formatchar() */ /* Integer input truncated to a character */ /* XXX: won't work for int */ @@ -578,6 +744,13 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, numeric_char = (STRINGLIB_CHAR)x; pnumeric_chars = &numeric_char; n_digits = 1; + + /* As a sort-of hack, we tell calc_number_widths that we only + have "remainder" characters. calc_number_widths thinks + these are characters that don't get formatted, only copied + into the output string. We do this for 'c' formatting, + because the characters are likely to be non-digits. */ + n_remainder = 1; } else { int base; @@ -629,8 +802,8 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, /* Is a sign character present in the output? If so, remember it and skip it */ - sign = pnumeric_chars[0]; - if (sign == '-') { + if (pnumeric_chars[0] == '-') { + sign_char = pnumeric_chars[0]; ++prefix; ++leading_chars_to_skip; } @@ -640,86 +813,26 @@ format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, pnumeric_chars += leading_chars_to_skip; } - if (format->type == 'n') - /* Compute how many additional chars we need to allocate - to hold the thousands grouping. */ - STRINGLIB_GROUPING_LOCALE(NULL, n_digits, n_digits, - 0, &n_grouping_chars, 0); - if (format->thousands_separators) - /* Compute how many additional chars we need to allocate - to hold the thousands grouping. */ - STRINGLIB_GROUPING(NULL, n_digits, n_digits, - 0, &n_grouping_chars, 0, "\3", ","); - - /* Calculate the widths of the various leading and trailing parts */ - calc_number_widths(&spec, sign, n_prefix, n_digits + n_grouping_chars, - format); - - /* Allocate a new string to hold the result */ - result = STRINGLIB_NEW(NULL, spec.n_total); - if (!result) - goto done; - p = STRINGLIB_STR(result); - - /* XXX There is too much magic here regarding the internals of - spec and the location of the prefix and digits. It would be - better if calc_number_widths returned a number of logical - offsets into the buffer, and those were used. Maybe in a - future code cleanup. */ - - /* Fill in the digit parts */ - n_leading_chars = spec.n_lpadding + spec.n_lsign + - spec.n_prefix + spec.n_spadding; - memmove(p + n_leading_chars, - pnumeric_chars, - n_digits * sizeof(STRINGLIB_CHAR)); - - /* If type is 'X', convert the filled in digits to uppercase */ - if (format->type == 'X') { - Py_ssize_t t; - for (t = 0; t < n_digits; ++t) - p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]); - } + /* Determine the grouping, separator, and decimal point, if any. */ + get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : + (format->thousands_separators ? + LT_DEFAULT_LOCALE : + LT_NO_LOCALE), + &locale); - /* Insert the grouping, if any, after the uppercasing of the digits, so - we can ensure that grouping chars won't be affected. */ - if (n_grouping_chars) { - /* We know this can't fail, since we've already - reserved enough space. */ - STRINGLIB_CHAR *pstart = p + n_leading_chars; -#ifndef NDEBUG - int r; -#endif - if (format->type == 'n') -#ifndef NDEBUG - r = -#endif - STRINGLIB_GROUPING_LOCALE(pstart, n_digits, n_digits, - spec.n_total+n_grouping_chars-n_leading_chars, - NULL, 0); - else -#ifndef NDEBUG - r = -#endif - STRINGLIB_GROUPING(pstart, n_digits, n_digits, - spec.n_total+n_grouping_chars-n_leading_chars, - NULL, 0, "\3", ","); - assert(r); - } + /* Calculate how much memory we'll need. */ + n_total = calc_number_widths(&spec, n_prefix, sign_char, pnumeric_chars, + n_digits, n_remainder, 0, &locale, format); - /* Fill in the non-digit parts (padding, sign, etc.) */ - fill_non_digits(p, &spec, prefix, n_digits + n_grouping_chars, - format->fill_char == '\0' ? ' ' : format->fill_char); - - /* If type is 'X', uppercase the prefix. This has to be done after the - prefix is filled in by fill_non_digits */ - if (format->type == 'X') { - Py_ssize_t t; - for (t = 0; t < n_prefix; ++t) - p[t + spec.n_lpadding + spec.n_lsign] = - STRINGLIB_TOUPPER(p[t + spec.n_lpadding + spec.n_lsign]); - } + /* Allocate the memory. */ + result = STRINGLIB_NEW(NULL, n_total); + if (!result) + goto done; + /* Populate the memory. */ + fill_number(STRINGLIB_STR(result), &spec, pnumeric_chars, n_digits, + prefix, format->fill_char == '\0' ? ' ' : format->fill_char, + &locale, format->type == 'X'); done: Py_XDECREF(tmp); @@ -733,64 +846,45 @@ done: #ifdef FORMAT_FLOAT #if STRINGLIB_IS_UNICODE -/* taken from unicodeobject.c */ -static Py_ssize_t -strtounicode(Py_UNICODE *buffer, const char *charbuffer) +static void +strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len) { - register Py_ssize_t i; - Py_ssize_t len = strlen(charbuffer); - for (i = len - 1; i >= 0; --i) - buffer[i] = (Py_UNICODE) charbuffer[i]; - - return len; + Py_ssize_t i; + for (i = 0; i < len; ++i) + buffer[i] = (Py_UNICODE)charbuffer[i]; } #endif -/* see FORMATBUFLEN in unicodeobject.c */ -#define FLOAT_FORMATBUFLEN 120 - /* much of this is taken from unicodeobject.c */ static PyObject * format_float_internal(PyObject *value, const InternalFormatSpec *format) { - /* fmt = '%.' + `prec` + `type` + '%%' - worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/ - char fmt[20]; - - /* taken from unicodeobject.c */ - /* Worst case length calc to ensure no buffer overrun: - - 'g' formats: - fmt = %#.g - buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp - for any double rep.) - len = 1 + prec + 1 + 2 + 5 = 9 + prec - - 'f' formats: - buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50) - len = 1 + 50 + 1 + prec = 52 + prec - - If prec=0 the effective precision is 1 (the leading digit is - always given), therefore increase the length by one. - - */ - char charbuf[FLOAT_FORMATBUFLEN]; + char *buf = NULL; /* buffer returned from PyOS_double_to_string */ Py_ssize_t n_digits; - double x; + Py_ssize_t n_remainder; + Py_ssize_t n_total; + int has_decimal; + double val; Py_ssize_t precision = format->precision; - PyObject *result = NULL; - STRINGLIB_CHAR sign; - char* trailing = ""; + STRINGLIB_CHAR type = format->type; + int add_pct = 0; STRINGLIB_CHAR *p; NumberFieldWidths spec; - STRINGLIB_CHAR type = format->type; + int flags = 0; + PyObject *result = NULL; + STRINGLIB_CHAR sign_char = '\0'; + int float_type; /* Used to see if we have a nan, inf, or regular float. */ #if STRINGLIB_IS_UNICODE - Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN]; + Py_UNICODE *unicode_tmp = NULL; #endif - /* alternate is not allowed on floats. */ + /* Locale settings, either from the actual locale or + from a hard-code pseudo-locale */ + LocaleInfo locale; + + /* Alternate is not allowed on floats. */ if (format->alternate) { PyErr_SetString(PyExc_ValueError, "Alternate form (#) not allowed in float format " @@ -798,84 +892,106 @@ format_float_internal(PyObject *value, goto done; } - /* first, do the conversion as 8-bit chars, using the platform's - snprintf. then, if needed, convert to unicode. */ + if (type == '\0') { + /* Omitted type specifier. This is like 'g' but with at least + one digit after the decimal point. */ + type = 'g'; + flags |= Py_DTSF_ADD_DOT_0; + } + + if (type == 'n') + /* 'n' is the same as 'g', except for the locale used to + format the result. We take care of that later. */ + type = 'g'; /* 'F' is the same as 'f', per the PEP */ if (type == 'F') type = 'f'; - x = PyFloat_AsDouble(value); - - if (x == -1.0 && PyErr_Occurred()) + val = PyFloat_AsDouble(value); + if (val == -1.0 && PyErr_Occurred()) goto done; if (type == '%') { type = 'f'; - x *= 100; - trailing = "%"; + val *= 100; + add_pct = 1; } if (precision < 0) precision = 6; - if (type == 'f' && fabs(x) >= 1e50) + if ((type == 'f' || type == 'F') && fabs(val) >= 1e50) type = 'g'; - /* cast "type", because if we're in unicode we need to pass a - 8-bit char. this is safe, because we've restricted what "type" - can be */ - PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision, - (char)type); - - /* do the actual formatting */ - PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x); - - /* adding trailing to fmt with PyOS_snprintf doesn't work, not - sure why. we'll just concatentate it here, no harm done. we - know we can't have a buffer overflow from the fmt size - analysis */ - strcat(charbuf, trailing); - - /* rather than duplicate the code for snprintf for both unicode - and 8 bit strings, we just use the 8 bit version and then - convert to unicode in a separate code path. that's probably - the lesser of 2 evils. */ + /* Cast "type", because if we're in unicode we need to pass a + 8-bit char. This is safe, because we've restricted what "type" + can be. */ + buf = PyOS_double_to_string(val, (char)type, precision, flags, + &float_type); + if (buf == NULL) + goto done; + n_digits = strlen(buf); + + if (add_pct) { + /* We know that buf has a trailing zero (since we just called + strlen() on it), and we don't use that fact any more. So we + can just write over the trailing zero. */ + buf[n_digits] = '%'; + n_digits += 1; + } + + /* Since there is no unicode version of PyOS_double_to_string, + just use the 8 bit version and then convert to unicode. */ #if STRINGLIB_IS_UNICODE - n_digits = strtounicode(unicodebuf, charbuf); - p = unicodebuf; + unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_digits)*sizeof(Py_UNICODE)); + if (unicode_tmp == NULL) { + PyErr_NoMemory(); + goto done; + } + strtounicode(unicode_tmp, buf, n_digits); + p = unicode_tmp; #else - /* compute the length. I believe this is done because the return - value from snprintf above is unreliable */ - n_digits = strlen(charbuf); - p = charbuf; + p = buf; #endif - /* is a sign character present in the output? if so, remember it + /* Is a sign character present in the output? If so, remember it and skip it */ - sign = p[0]; - if (sign == '-') { + if (*p == '-') { + sign_char = *p; ++p; --n_digits; } - calc_number_widths(&spec, sign, 0, n_digits, format); + /* Determine if we have any "remainder" (after the digits, might include + decimal or exponent or both (or neither)) */ + parse_number(p, n_digits, &n_remainder, &has_decimal); + + /* Determine the grouping, separator, and decimal point, if any. */ + get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE : + (format->thousands_separators ? + LT_DEFAULT_LOCALE : + LT_NO_LOCALE), + &locale); - /* allocate a string with enough space */ - result = STRINGLIB_NEW(NULL, spec.n_total); + /* Calculate how much memory we'll need. */ + n_total = calc_number_widths(&spec, 0, sign_char, p, n_digits, + n_remainder, has_decimal, &locale, format); + + /* Allocate the memory. */ + result = STRINGLIB_NEW(NULL, n_total); if (result == NULL) goto done; - /* Fill in the non-digit parts (padding, sign, etc.) */ - fill_non_digits(STRINGLIB_STR(result), &spec, NULL, n_digits, - format->fill_char == '\0' ? ' ' : format->fill_char); - - /* fill in the digit parts */ - memmove(STRINGLIB_STR(result) + - (spec.n_lpadding + spec.n_lsign + spec.n_spadding), - p, - n_digits * sizeof(STRINGLIB_CHAR)); + /* Populate the memory. */ + fill_number(STRINGLIB_STR(result), &spec, p, n_digits, NULL, + format->fill_char == '\0' ? ' ' : format->fill_char, &locale, + 0); done: + PyMem_Free(buf); +#if STRINGLIB_IS_UNICODE + PyMem_Free(unicode_tmp); +#endif return result; } #endif /* FORMAT_FLOAT */ @@ -1056,11 +1172,7 @@ FORMAT_FLOAT(PyObject *obj, /* type conversion? */ switch (format.type) { - case '\0': - /* 'Z' means like 'g', but with at least one decimal. See - PyOS_ascii_formatd */ - format.type = 'Z'; - /* Deliberate fall through to the next case statement */ + case '\0': /* No format code: like 'g', but with at least one decimal. */ case 'e': case 'E': case 'f': diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h index 9254c09..f548133 100644 --- a/Objects/stringlib/localeutil.h +++ b/Objects/stringlib/localeutil.h @@ -5,161 +5,208 @@ #include +#define MAX(x, y) ((x) < (y) ? (y) : (x)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +typedef struct { + const char *grouping; + char previous; + Py_ssize_t i; /* Where we're currently pointing in grouping. */ +} GroupGenerator; + +static void +_GroupGenerator_init(GroupGenerator *self, const char *grouping) +{ + self->grouping = grouping; + self->i = 0; + self->previous = 0; +} + +/* Returns the next grouping, or 0 to signify end. */ +static Py_ssize_t +_GroupGenerator_next(GroupGenerator *self) +{ + /* Note that we don't really do much error checking here. If a + grouping string contains just CHAR_MAX, for example, then just + terminate the generator. That shouldn't happen, but at least we + fail gracefully. */ + switch (self->grouping[self->i]) { + case 0: + return self->previous; + case CHAR_MAX: + /* Stop the generator. */ + return 0; + default: { + char ch = self->grouping[self->i]; + self->previous = ch; + self->i++; + return (Py_ssize_t)ch; + } + } +} + +/* Fill in some digits, leading zeros, and thousands separator. All + are optional, depending on when we're called. */ +static void +fill(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end, + Py_ssize_t n_chars, Py_ssize_t n_zeros, const char* thousands_sep, + Py_ssize_t thousands_sep_len) +{ +#if STRINGLIB_IS_UNICODE + Py_ssize_t i; +#endif + + if (thousands_sep) { + *buffer_end -= thousands_sep_len; + + /* Copy the thousands_sep chars into the buffer. */ +#if STRINGLIB_IS_UNICODE + /* Convert from the char's of the thousands_sep from + the locale into unicode. */ + for (i = 0; i < thousands_sep_len; ++i) + (*buffer_end)[i] = thousands_sep[i]; +#else + /* No conversion, just memcpy the thousands_sep. */ + memcpy(*buffer_end, thousands_sep, thousands_sep_len); +#endif + } + + *buffer_end -= n_chars; + *digits_end -= n_chars; + memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR)); + + *buffer_end -= n_zeros; + STRINGLIB_FILL(*buffer_end, '0', n_zeros); +} + /** * _Py_InsertThousandsGrouping: * @buffer: A pointer to the start of a string. - * @n_buffer: The length of the string. + * @n_buffer: Number of characters in @buffer. + * @digits: A pointer to the digits we're reading from. If count + * is non-NULL, this is unused. * @n_digits: The number of digits in the string, in which we want * to put the grouping chars. - * @buf_size: The maximum size of the buffer pointed to by buffer. - * @count: If non-NULL, points to a variable that will receive the - * number of characters we need to insert (and no formatting - * will actually occur). - * @append_zero_char: If non-zero, put a trailing zero at the end of - * of the resulting string, if and only if we modified the - * string. + * @min_width: The minimum width of the digits in the output string. + * Output will be zero-padded on the left to fill. * @grouping: see definition in localeconv(). * @thousands_sep: see definition in localeconv(). * + * There are 2 modes: counting and filling. If @buffer is NULL, + * we are in counting mode, else filling mode. + * If counting, the required buffer size is returned. + * If filling, we know the buffer will be large enough, so we don't + * need to pass in the buffer size. * Inserts thousand grouping characters (as defined by grouping and * thousands_sep) into the string between buffer and buffer+n_digits. - * If count is non-NULL, don't do any formatting, just count the - * number of characters to insert. This is used by the caller to - * appropriately resize the buffer, if needed. If count is non-NULL, - * buffer can be NULL (it is not dereferenced at all in that case). * * Return value: 0 on error, else 1. Note that no error can occur if * count is non-NULL. * * This name won't be used, the includer of this file should define * it to be the actual function name, based on unicode or string. + * + * As closely as possible, this code mimics the logic in decimal.py's + _insert_thousands_sep(). **/ -int +Py_ssize_t _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer, Py_ssize_t n_buffer, + STRINGLIB_CHAR *digits, Py_ssize_t n_digits, - Py_ssize_t buf_size, - Py_ssize_t *count, - int append_zero_char, + Py_ssize_t min_width, const char *grouping, const char *thousands_sep) { - Py_ssize_t thousands_sep_len = strlen(thousands_sep); - STRINGLIB_CHAR *pend = NULL; /* current end of buffer */ - STRINGLIB_CHAR *pmax = NULL; /* max of buffer */ - char current_grouping; - Py_ssize_t remaining = n_digits; /* Number of chars remaining to - be looked at */ - - /* Initialize the character count, if we're just counting. */ - if (count) - *count = 0; - else { - /* We're not just counting, we're modifying buffer */ - pend = buffer + n_buffer; - pmax = buffer + buf_size; + Py_ssize_t count = 0; + Py_ssize_t n_zeros; + int loop_broken = 0; + int use_separator = 0; /* First time through, don't append the + separator. They only go between + groups. */ + STRINGLIB_CHAR *buffer_end = NULL; + STRINGLIB_CHAR *digits_end = NULL; + Py_ssize_t l; + Py_ssize_t n_chars; + Py_ssize_t thousands_sep_len = strlen(thousands_sep); + Py_ssize_t remaining = n_digits; /* Number of chars remaining to + be looked at */ + /* A generator that returns all of the grouping widths, until it + returns 0. */ + GroupGenerator groupgen; + _GroupGenerator_init(&groupgen, grouping); + + if (buffer) { + buffer_end = buffer + n_buffer; + digits_end = digits + n_digits; + } + + while ((l = _GroupGenerator_next(&groupgen)) > 0) { + l = MIN(l, MAX(MAX(remaining, min_width), 1)); + n_zeros = MAX(0, l - remaining); + n_chars = MAX(0, MIN(remaining, l)); + + /* Use n_zero zero's and n_chars chars */ + + /* Count only, don't do anything. */ + count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; + + if (buffer) { + /* Copy into the output buffer. */ + fill(&digits_end, &buffer_end, n_chars, n_zeros, + use_separator ? thousands_sep : NULL, thousands_sep_len); } - /* Starting at the end and working right-to-left, keep track of - what grouping needs to be added and insert that. */ - current_grouping = *grouping++; - - /* If the first character is 0, perform no grouping at all. */ - if (current_grouping == 0) - return 1; - - while (remaining > current_grouping) { - /* Always leave buffer and pend valid at the end of this - loop, since we might leave with a return statement. */ - - remaining -= current_grouping; - if (count) { - /* We're only counting, not touching the memory. */ - *count += thousands_sep_len; - } - else { - /* Do the formatting. */ - - STRINGLIB_CHAR *plast = buffer + remaining; - - /* Is there room to insert thousands_sep_len chars? */ - if (pmax - pend < thousands_sep_len) - /* No room. */ - return 0; - - /* Move the rest of the string down. */ - memmove(plast + thousands_sep_len, - plast, - (pend - plast) * sizeof(STRINGLIB_CHAR)); - /* Copy the thousands_sep chars into the buffer. */ -#if STRINGLIB_IS_UNICODE - /* Convert from the char's of the thousands_sep from - the locale into unicode. */ - { - Py_ssize_t i; - for (i = 0; i < thousands_sep_len; ++i) - plast[i] = thousands_sep[i]; - } -#else - /* No conversion, just memcpy the thousands_sep. */ - memcpy(plast, thousands_sep, thousands_sep_len); -#endif - } - - /* Adjust end pointer. */ - pend += thousands_sep_len; - - /* Move to the next grouping character, unless we're - repeating (which is designated by a grouping of 0). */ - if (*grouping != 0) { - current_grouping = *grouping++; - if (current_grouping == CHAR_MAX) - /* We're done. */ - break; - } + /* Use a separator next time. */ + use_separator = 1; + + remaining -= n_chars; + min_width -= l; + + if (remaining <= 0 && min_width <= 0) { + loop_broken = 1; + break; } - if (append_zero_char) { - /* Append a zero character to mark the end of the string, - if there's room. */ - if (pend - (buffer + remaining) < 1) - /* No room, error. */ - return 0; - *pend = 0; + min_width -= thousands_sep_len; + } + if (!loop_broken) { + /* We left the loop without using a break statement. */ + + l = MAX(MAX(remaining, min_width), 1); + n_zeros = MAX(0, l - remaining); + n_chars = MAX(0, MIN(remaining, l)); + + /* Use n_zero zero's and n_chars chars */ + count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; + if (buffer) { + /* Copy into the output buffer. */ + fill(&digits_end, &buffer_end, n_chars, n_zeros, + use_separator ? thousands_sep : NULL, thousands_sep_len); } - return 1; + } + return count; } /** * _Py_InsertThousandsGroupingLocale: * @buffer: A pointer to the start of a string. - * @n_buffer: The length of the string. * @n_digits: The number of digits in the string, in which we want * to put the grouping chars. - * @buf_size: The maximum size of the buffer pointed to by buffer. - * @count: If non-NULL, points to a variable that will receive the - * number of characters we need to insert (and no formatting - * will actually occur). - * @append_zero_char: If non-zero, put a trailing zero at the end of - * of the resulting string, if and only if we modified the - * string. * * Reads thee current locale and calls _Py_InsertThousandsGrouping(). **/ -int +Py_ssize_t _Py_InsertThousandsGroupingLocale(STRINGLIB_CHAR *buffer, Py_ssize_t n_buffer, + STRINGLIB_CHAR *digits, Py_ssize_t n_digits, - Py_ssize_t buf_size, - Py_ssize_t *count, - int append_zero_char) + Py_ssize_t min_width) { struct lconv *locale_data = localeconv(); const char *grouping = locale_data->grouping; const char *thousands_sep = locale_data->thousands_sep; - return _Py_InsertThousandsGrouping(buffer, n_buffer, n_digits, - buf_size, count, - append_zero_char, grouping, - thousands_sep); + return _Py_InsertThousandsGrouping(buffer, n_buffer, digits, n_digits, + min_width, grouping, thousands_sep); } #endif /* STRINGLIB_LOCALEUTIL_H */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f52c435..3cea899 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -8792,42 +8792,13 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) return NULL; } -static Py_ssize_t -strtounicode(Py_UNICODE *buffer, const char *charbuffer) +static void +strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len) { register Py_ssize_t i; - Py_ssize_t len = strlen(charbuffer); for (i = len - 1; i >= 0; i--) buffer[i] = (Py_UNICODE) charbuffer[i]; - - return len; -} - -static int -doubletounicode(Py_UNICODE *buffer, size_t len, const char *format, double x) -{ - Py_ssize_t result; - - PyOS_ascii_formatd((char *)buffer, len, format, x); - result = strtounicode(buffer, (char *)buffer); - return Py_SAFE_DOWNCAST(result, Py_ssize_t, int); -} - -#if 0 -static int -longtounicode(Py_UNICODE *buffer, size_t len, const char *format, long x) -{ - Py_ssize_t result; - - PyOS_snprintf((char *)buffer, len, format, x); - result = strtounicode(buffer, (char *)buffer); - return Py_SAFE_DOWNCAST(result, Py_ssize_t, int); } -#endif - -/* XXX To save some code duplication, formatfloat/long/int could have been - shared with stringobject.c, converting from 8-bit to Unicode after the - formatting is done. */ static int formatfloat(Py_UNICODE *buf, @@ -8837,54 +8808,59 @@ formatfloat(Py_UNICODE *buf, int type, PyObject *v) { - /* fmt = '%#.' + `prec` + `type` - worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/ - char fmt[20]; + /* eric.smith: To minimize disturbances in PyUnicode_Format (the + only caller of this routine), I'm going to keep the existing + API to this function. That means that we'll allocate memory and + then copy back into the supplied buffer. But that's better than + all of the changes that would be required in PyUnicode_Format + because it does lots of memory management tricks. */ + + char* p = NULL; + int result = -1; double x; + Py_ssize_t len; x = PyFloat_AsDouble(v); if (x == -1.0 && PyErr_Occurred()) - return -1; + goto done; if (prec < 0) prec = 6; + /* make sure that the decimal representation of precision really does need at most 10 digits: platforms with sizeof(int) == 8 exist! */ if (prec > 0x7fffffffL) { PyErr_SetString(PyExc_OverflowError, "outrageously large precision " "for formatted float"); - return -1; + goto done; } if (type == 'f' && fabs(x) >= 1e50) type = 'g'; - /* Worst case length calc to ensure no buffer overrun: - - 'g' formats: - fmt = %#.g - buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp - for any double rep.) - len = 1 + prec + 1 + 2 + 5 = 9 + prec - - 'f' formats: - buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50) - len = 1 + 50 + 1 + prec = 52 + prec - If prec=0 the effective precision is 1 (the leading digit is - always given), therefore increase the length by one. - - */ if (((type == 'g' || type == 'G') && buflen <= (size_t)10 + (size_t)prec) || - (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) { + ((type == 'f' || type == 'F') && + buflen <= (size_t)53 + (size_t)prec)) { PyErr_SetString(PyExc_OverflowError, "formatted float is too long (precision too large?)"); - return -1; + goto done; + } + + p = PyOS_double_to_string(x, type, prec, + (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); + len = strlen(p); + if (len+1 >= buflen) { + /* Caller supplied buffer is not large enough. */ + PyErr_NoMemory(); + goto done; } - PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c", - (flags&F_ALT) ? "#" : "", - prec, type); - return doubletounicode(buf, buflen, fmt, x); + strtounicode(buf, p, len); + result = Py_SAFE_DOWNCAST(len, Py_ssize_t, int); + +done: + PyMem_Free(p); + return result; } static PyObject* @@ -8903,84 +8879,6 @@ formatlong(PyObject *val, int flags, int prec, int type) return result; } -#if 0 -static int -formatint(Py_UNICODE *buf, - size_t buflen, - int flags, - int prec, - int type, - PyObject *v) -{ - /* fmt = '%#.' + `prec` + 'l' + `type` - * worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine) - * + 1 + 1 - * = 24 - */ - char fmt[64]; /* plenty big enough! */ - char *sign; - long x; - - x = PyLong_AsLong(v); - if (x == -1 && PyErr_Occurred()) - return -1; - if (x < 0 && type == 'u') { - type = 'd'; - } - if (x < 0 && (type == 'x' || type == 'X' || type == 'o')) - sign = "-"; - else - sign = ""; - if (prec < 0) - prec = 1; - - /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal)) - * worst case buf = '-0x' + [0-9]*prec, where prec >= 11 - */ - if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) { - PyErr_SetString(PyExc_OverflowError, - "formatted integer is too long (precision too large?)"); - return -1; - } - - if ((flags & F_ALT) && - (type == 'x' || type == 'X' || type == 'o')) { - /* When converting under %#o, %#x or %#X, there are a number - * of issues that cause pain: - * - for %#o, we want a different base marker than C - * - when 0 is being converted, the C standard leaves off - * the '0x' or '0X', which is inconsistent with other - * %#x/%#X conversions and inconsistent with Python's - * hex() function - * - there are platforms that violate the standard and - * convert 0 with the '0x' or '0X' - * (Metrowerks, Compaq Tru64) - * - there are platforms that give '0x' when converting - * under %#X, but convert 0 in accordance with the - * standard (OS/2 EMX) - * - * We can achieve the desired consistency by inserting our - * own '0x' or '0X' prefix, and substituting %x/%X in place - * of %#x/%#X. - * - * Note that this is the same approach as used in - * formatint() in stringobject.c - */ - PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c", - sign, type, prec, type); - } - else { - PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c", - sign, (flags&F_ALT) ? "#" : "", - prec, type); - } - if (sign[0]) - return longtounicode(buf, buflen, fmt, -x); - else - return longtounicode(buf, buflen, fmt, x); -} -#endif - static int formatchar(Py_UNICODE *buf, size_t buflen, @@ -9359,8 +9257,6 @@ PyObject *PyUnicode_Format(PyObject *format, case 'F': case 'g': case 'G': - if (c == 'F') - c = 'f'; pbuf = formatbuf; len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), flags, prec, c, v); diff --git a/Python/marshal.c b/Python/marshal.c index e5e5ce4..0d55132 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -236,12 +236,15 @@ w_object(PyObject *v, WFILE *p) w_string((char*)buf, 8, p); } else { - char buf[256]; /* Plenty to format any double */ - n = _PyFloat_Repr(PyFloat_AS_DOUBLE(v), - buf, sizeof(buf)); + char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v), + 'r', 0, 0, NULL); + if (!buf) + return; + n = strlen(buf); w_byte(TYPE_FLOAT, p); w_byte((int)n, p); w_string(buf, (int)n, p); + PyMem_Free(buf); } } #ifndef WITHOUT_COMPLEX @@ -263,17 +266,24 @@ w_object(PyObject *v, WFILE *p) w_string((char*)buf, 8, p); } else { - char buf[256]; /* Plenty to format any double */ + char *buf; w_byte(TYPE_COMPLEX, p); - n = _PyFloat_Repr(PyComplex_RealAsDouble(v), - buf, sizeof(buf)); + buf = PyOS_double_to_string(PyComplex_RealAsDouble(v), + 'r', 0, 0, NULL); + if (!buf) + return; n = strlen(buf); w_byte((int)n, p); w_string(buf, (int)n, p); - n = _PyFloat_Repr(PyComplex_ImagAsDouble(v), - buf, sizeof(buf)); + PyMem_Free(buf); + buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v), + 'r', 0, 0, NULL); + if (!buf) + return; + n = strlen(buf); w_byte((int)n, p); w_string(buf, (int)n, p); + PyMem_Free(buf); } } #endif diff --git a/Python/pystrtod.c b/Python/pystrtod.c index b81abce..217246e 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -37,6 +37,38 @@ * * Return value: the #gdouble value. **/ + +#ifndef PY_NO_SHORT_FLOAT_REPR + +double +PyOS_ascii_strtod(const char *nptr, char **endptr) +{ + double result; + _Py_SET_53BIT_PRECISION_HEADER; + + assert(nptr != NULL); + /* Set errno to zero, so that we can distinguish zero results + and underflows */ + errno = 0; + + _Py_SET_53BIT_PRECISION_START; + result = _Py_dg_strtod(nptr, endptr); + _Py_SET_53BIT_PRECISION_END; + + return result; + +} + +#else + +/* + Use system strtod; since strtod is locale aware, we may + have to first fix the decimal separator. + + Note that unlike _Py_dg_strtod, the system strtod may not always give + correctly rounded results. +*/ + double PyOS_ascii_strtod(const char *nptr, char **endptr) { @@ -187,6 +219,15 @@ PyOS_ascii_strtod(const char *nptr, char **endptr) return val; } +#endif + +double +PyOS_ascii_atof(const char *nptr) +{ + return PyOS_ascii_strtod(nptr, NULL); +} + + /* Given a string that may have a decimal point in the current locale, change it back to a dot. Since the string cannot get longer, no need for a maximum buffer size parameter. */ @@ -292,8 +333,9 @@ ensure_minumim_exponent_length(char* buffer, size_t buf_size) } } -/* Ensure that buffer has a decimal point in it. The decimal point - will not be in the current locale, it will always be '.' */ +/* Ensure that buffer has a decimal point in it. The decimal point will not + be in the current locale, it will always be '.'. Don't add a decimal if an + exponent is present. */ Py_LOCAL_INLINE(void) ensure_decimal_point(char* buffer, size_t buf_size) { @@ -322,7 +364,8 @@ ensure_decimal_point(char* buffer, size_t buf_size) insert_count = 1; } } - else { + else if (!(*p == 'e' || *p == 'E')) { + /* Don't add ".0" if we have an exponent. */ chars_to_insert = ".0"; insert_count = 2; } @@ -341,37 +384,6 @@ ensure_decimal_point(char* buffer, size_t buf_size) } } -/* Add the locale specific grouping characters to buffer. Note - that any decimal point (if it's present) in buffer is already - locale-specific. Return 0 on error, else 1. */ -Py_LOCAL_INLINE(int) -add_thousands_grouping(char* buffer, size_t buf_size) -{ - Py_ssize_t len = strlen(buffer); - struct lconv *locale_data = localeconv(); - const char *decimal_point = locale_data->decimal_point; - - /* Find the decimal point, if any. We're only concerned - about the characters to the left of the decimal when - adding grouping. */ - char *p = strstr(buffer, decimal_point); - if (!p) { - /* No decimal, use the entire string. */ - - /* If any exponent, adjust p. */ - p = strpbrk(buffer, "eE"); - if (!p) - /* No exponent and no decimal. Use the entire - string. */ - p = buffer + len; - } - /* At this point, p points just past the right-most character we - want to format. We need to add the grouping string for the - characters between buffer and p. */ - return _PyBytes_InsertThousandsGroupingLocale(buffer, len, p-buffer, - buf_size, NULL, 1); -} - /* see FORMATBUFLEN in unicodeobject.c */ #define FLOAT_FORMATBUFLEN 120 @@ -386,9 +398,8 @@ add_thousands_grouping(char* buffer, size_t buf_size) * Converts a #gdouble to a string, using the '.' as * decimal point. To format the number you pass in * a printf()-style format string. Allowed conversion - * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'. + * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'. * - * 'n' is the same as 'g', except it uses the current locale. * 'Z' is the same as 'g', except it always has a decimal and * at least one digit after the decimal. * @@ -403,11 +414,6 @@ PyOS_ascii_formatd(char *buffer, char format_char; size_t format_len = strlen(format); - /* For type 'n', we need to make a copy of the format string, because - we're going to modify 'n' -> 'g', and format is const char*, so we - can't modify it directly. FLOAT_FORMATBUFLEN should be longer than - we ever need this to be. There's an upcoming check to ensure it's - big enough. */ /* Issue 2264: code 'Z' requires copying the format. 'Z' is 'g', but also with at least one character past the decimal. */ char tmp_format[FLOAT_FORMATBUFLEN]; @@ -433,12 +439,12 @@ PyOS_ascii_formatd(char *buffer, if (!(format_char == 'e' || format_char == 'E' || format_char == 'f' || format_char == 'F' || format_char == 'g' || format_char == 'G' || - format_char == 'n' || format_char == 'Z')) + format_char == 'Z')) return NULL; - /* Map 'n' or 'Z' format_char to 'g', by copying the format string and + /* Map 'Z' format_char to 'g', by copying the format string and replacing the final char with a 'g' */ - if (format_char == 'n' || format_char == 'Z') { + if (format_char == 'Z') { if (format_len + 1 >= sizeof(tmp_format)) { /* The format won't fit in our copy. Error out. In practice, this will never happen and will be @@ -457,11 +463,8 @@ PyOS_ascii_formatd(char *buffer, /* Do various fixups on the return string */ /* Get the current locale, and find the decimal point string. - Convert that string back to a dot. Do not do this if using the - 'n' (number) format code, since we want to keep the localized - decimal point in that case. */ - if (format_char != 'n') - change_decimal_from_locale_to_dot(buffer); + Convert that string back to a dot. */ + change_decimal_from_locale_to_dot(buffer); /* If an exponent exists, ensure that the exponent is at least MIN_EXPONENT_DIGITS digits, providing the buffer is large enough @@ -475,16 +478,497 @@ PyOS_ascii_formatd(char *buffer, if (format_char == 'Z') ensure_decimal_point(buffer, buf_size); - /* If format_char is 'n', add the thousands grouping. */ - if (format_char == 'n') - if (!add_thousands_grouping(buffer, buf_size)) + return buffer; +} + +#ifdef PY_NO_SHORT_FLOAT_REPR + +/* The fallback code to use if _Py_dg_dtoa is not available. */ + +PyAPI_FUNC(char *) PyOS_double_to_string(double val, + char format_code, + int precision, + int flags, + int *type) +{ + char buf[128]; + char format[32]; + Py_ssize_t len; + char *result; + char *p; + int t; + int upper = 0; + + /* Validate format_code, and map upper and lower case */ + switch (format_code) { + case 'e': /* exponent */ + case 'f': /* fixed */ + case 'g': /* general */ + break; + case 'E': + upper = 1; + format_code = 'e'; + break; + case 'F': + upper = 1; + format_code = 'f'; + break; + case 'G': + upper = 1; + format_code = 'g'; + break; + case 'r': /* repr format */ + /* Supplied precision is unused, must be 0. */ + if (precision != 0) { + PyErr_BadInternalCall(); + return NULL; + } + precision = 17; + format_code = 'g'; + break; + case 's': /* str format */ + /* Supplied precision is unused, must be 0. */ + if (precision != 0) { + PyErr_BadInternalCall(); return NULL; + } + precision = 12; + format_code = 'g'; + break; + default: + PyErr_BadInternalCall(); + return NULL; + } - return buffer; + /* Handle nan and inf. */ + if (Py_IS_NAN(val)) { + strcpy(buf, "nan"); + t = Py_DTST_NAN; + } else if (Py_IS_INFINITY(val)) { + if (copysign(1., val) == 1.) + strcpy(buf, "inf"); + else + strcpy(buf, "-inf"); + t = Py_DTST_INFINITE; + } else { + t = Py_DTST_FINITE; + + + if (flags & Py_DTSF_ADD_DOT_0) + format_code = 'Z'; + + PyOS_snprintf(format, 32, "%%%s.%i%c", (flags & Py_DTSF_ALT ? "#" : ""), precision, format_code); + PyOS_ascii_formatd(buf, sizeof(buf), format, val); + } + + len = strlen(buf); + + /* Add 1 for the trailing 0 byte. + Add 1 because we might need to make room for the sign. + */ + result = PyMem_Malloc(len + 2); + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + p = result; + + /* Never add sign for nan/inf, even if asked. */ + if (flags & Py_DTSF_SIGN && buf[0] != '-' && t == Py_DTST_FINITE) + *p++ = '+'; + + strcpy(p, buf); + + if (upper) { + /* Convert to upper case. */ + char *p1; + for (p1 = p; *p1; p1++) + *p1 = toupper(*p1); + } + + if (type) + *type = t; + return result; } -double -PyOS_ascii_atof(const char *nptr) +#else + +/* _Py_dg_dtoa is available. */ + +/* I'm using a lookup table here so that I don't have to invent a non-locale + specific way to convert to uppercase */ +#define OFS_INF 0 +#define OFS_NAN 1 +#define OFS_E 2 + +/* The lengths of these are known to the code below, so don't change them */ +static char *lc_float_strings[] = { + "inf", + "nan", + "e", +}; +static char *uc_float_strings[] = { + "INF", + "NAN", + "E", +}; + + +/* Convert a double d to a string, and return a PyMem_Malloc'd block of + memory contain the resulting string. + + Arguments: + d is the double to be converted + format_code is one of 'e', 'f', 'g', 'r' or 's'. 'e', 'f' and 'g' + correspond to '%e', '%f' and '%g'; 'r' and 's' correspond + to repr and str. + mode is one of '0', '2' or '3', and is completely determined by + format_code: 'e', 'g' and 's' use mode 2; 'f' mode 3, 'r' mode 0. + precision is the desired precision + always_add_sign is nonzero if a '+' sign should be included for positive + numbers + add_dot_0_if_integer is nonzero if integers in non-exponential form + should have ".0" added. Only applies to format codes 'r', 's', and 'g'. + use_alt_formatting is nonzero if alternative formatting should be + used. Only applies to format codes 'e', 'f' and 'g'. + type, if non-NULL, will be set to one of these constants to identify + the type of the 'd' argument: + Py_DTST_FINITE + Py_DTST_INFINITE + Py_DTST_NAN + + Returns a PyMem_Malloc'd block of memory containing the resulting string, + or NULL on error. If NULL is returned, the Python error has been set. + */ + +static char * +format_float_short(double d, char format_code, + int mode, Py_ssize_t precision, + int always_add_sign, int add_dot_0_if_integer, + int use_alt_formatting, char **float_strings, int *type) { - return PyOS_ascii_strtod(nptr, NULL); + char *buf = NULL; + char *p = NULL; + Py_ssize_t bufsize = 0; + char *digits, *digits_end; + int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0; + Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end; + _Py_SET_53BIT_PRECISION_HEADER; + + /* _Py_dg_dtoa returns a digit string (no decimal point or exponent). + Must be matched by a call to _Py_dg_freedtoa. */ + _Py_SET_53BIT_PRECISION_START; + digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign, + &digits_end); + _Py_SET_53BIT_PRECISION_END; + + decpt = (Py_ssize_t)decpt_as_int; + if (digits == NULL) { + /* The only failure mode is no memory. */ + PyErr_NoMemory(); + goto exit; + } + assert(digits_end != NULL && digits_end >= digits); + digits_len = digits_end - digits; + + if (digits_len && !isdigit(digits[0])) { + /* Infinities and nans here; adapt Gay's output, + so convert Infinity to inf and NaN to nan, and + ignore sign of nan. Then return. */ + + /* We only need 5 bytes to hold the result "+inf\0" . */ + bufsize = 5; /* Used later in an assert. */ + buf = (char *)PyMem_Malloc(bufsize); + if (buf == NULL) { + PyErr_NoMemory(); + goto exit; + } + p = buf; + + if (digits[0] == 'i' || digits[0] == 'I') { + if (sign == 1) { + *p++ = '-'; + } + else if (always_add_sign) { + *p++ = '+'; + } + strncpy(p, float_strings[OFS_INF], 3); + p += 3; + + if (type) + *type = Py_DTST_INFINITE; + } + else if (digits[0] == 'n' || digits[0] == 'N') { + /* note that we *never* add a sign for a nan, + even if one has explicitly been requested */ + strncpy(p, float_strings[OFS_NAN], 3); + p += 3; + + if (type) + *type = Py_DTST_NAN; + } + else { + /* shouldn't get here: Gay's code should always return + something starting with a digit, an 'I', or 'N' */ + strncpy(p, "ERR", 3); + p += 3; + assert(0); + } + goto exit; + } + + /* The result must be finite (not inf or nan). */ + if (type) + *type = Py_DTST_FINITE; + + + /* We got digits back, format them. We may need to pad 'digits' + either on the left or right (or both) with extra zeros, so in + general the resulting string has the form + + [][] + + where either of the pieces could be empty, and there's a + decimal point that could appear either in or in the + leading or trailing . + + Imagine an infinite 'virtual' string vdigits, consisting of the + string 'digits' (starting at index 0) padded on both the left and + right with infinite strings of zeros. We want to output a slice + + vdigits[vdigits_start : vdigits_end] + + of this virtual string. Thus if vdigits_start < 0 then we'll end + up producing some leading zeros; if vdigits_end > digits_len there + will be trailing zeros in the output. The next section of code + determines whether to use an exponent or not, figures out the + position 'decpt' of the decimal point, and computes 'vdigits_start' + and 'vdigits_end'. */ + vdigits_end = digits_len; + switch (format_code) { + case 'e': + use_exp = 1; + vdigits_end = precision; + break; + case 'f': + vdigits_end = decpt + precision; + break; + case 'g': + if (decpt <= -4 || decpt > precision) + use_exp = 1; + if (use_alt_formatting) + vdigits_end = precision; + break; + case 'r': + /* convert to exponential format at 1e16. We used to convert + at 1e17, but that gives odd-looking results for some values + when a 16-digit 'shortest' repr is padded with bogus zeros. + For example, repr(2e16+8) would give 20000000000000010.0; + the true value is 20000000000000008.0. */ + if (decpt <= -4 || decpt > 16) + use_exp = 1; + break; + case 's': + /* if we're forcing a digit after the point, convert to + exponential format at 1e11. If not, convert at 1e12. */ + if (decpt <= -4 || decpt > + (add_dot_0_if_integer ? precision-1 : precision)) + use_exp = 1; + break; + default: + PyErr_BadInternalCall(); + goto exit; + } + + /* if using an exponent, reset decimal point position to 1 and adjust + exponent accordingly.*/ + if (use_exp) { + exp = decpt - 1; + decpt = 1; + } + /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start < + decpt < vdigits_end if add_dot_0_if_integer and no exponent */ + vdigits_start = decpt <= 0 ? decpt-1 : 0; + if (!use_exp && add_dot_0_if_integer) + vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1; + else + vdigits_end = vdigits_end > decpt ? vdigits_end : decpt; + + /* double check inequalities */ + assert(vdigits_start <= 0 && + 0 <= digits_len && + digits_len <= vdigits_end); + /* decimal point should be in (vdigits_start, vdigits_end] */ + assert(vdigits_start < decpt && decpt <= vdigits_end); + + /* Compute an upper bound how much memory we need. This might be a few + chars too long, but no big deal. */ + bufsize = + /* sign, decimal point and trailing 0 byte */ + 3 + + + /* total digit count (including zero padding on both sides) */ + (vdigits_end - vdigits_start) + + + /* exponent "e+100", max 3 numerical digits */ + (use_exp ? 5 : 0); + + /* Now allocate the memory and initialize p to point to the start of + it. */ + buf = (char *)PyMem_Malloc(bufsize); + if (buf == NULL) { + PyErr_NoMemory(); + goto exit; + } + p = buf; + + /* Add a negative sign if negative, and a plus sign if non-negative + and always_add_sign is true. */ + if (sign == 1) + *p++ = '-'; + else if (always_add_sign) + *p++ = '+'; + + /* note that exactly one of the three 'if' conditions is true, + so we include exactly one decimal point */ + /* Zero padding on left of digit string */ + if (decpt <= 0) { + memset(p, '0', decpt-vdigits_start); + p += decpt - vdigits_start; + *p++ = '.'; + memset(p, '0', 0-decpt); + p += 0-decpt; + } + else { + memset(p, '0', 0-vdigits_start); + p += 0 - vdigits_start; + } + + /* Digits, with included decimal point */ + if (0 < decpt && decpt <= digits_len) { + strncpy(p, digits, decpt-0); + p += decpt-0; + *p++ = '.'; + strncpy(p, digits+decpt, digits_len-decpt); + p += digits_len-decpt; + } + else { + strncpy(p, digits, digits_len); + p += digits_len; + } + + /* And zeros on the right */ + if (digits_len < decpt) { + memset(p, '0', decpt-digits_len); + p += decpt-digits_len; + *p++ = '.'; + memset(p, '0', vdigits_end-decpt); + p += vdigits_end-decpt; + } + else { + memset(p, '0', vdigits_end-digits_len); + p += vdigits_end-digits_len; + } + + /* Delete a trailing decimal pt unless using alternative formatting. */ + if (p[-1] == '.' && !use_alt_formatting) + p--; + + /* Now that we've done zero padding, add an exponent if needed. */ + if (use_exp) { + *p++ = float_strings[OFS_E][0]; + exp_len = sprintf(p, "%+.02d", exp); + p += exp_len; + } + exit: + if (buf) { + *p = '\0'; + /* It's too late if this fails, as we've already stepped on + memory that isn't ours. But it's an okay debugging test. */ + assert(p-buf < bufsize); + } + if (digits) + _Py_dg_freedtoa(digits); + + return buf; +} + + +PyAPI_FUNC(char *) PyOS_double_to_string(double val, + char format_code, + int precision, + int flags, + int *type) +{ + char lc_format_code = format_code; + char** float_strings = lc_float_strings; + int mode = 0; + + /* Validate format_code, and map upper and lower case */ + switch (format_code) { + case 'e': /* exponent */ + case 'f': /* fixed */ + case 'g': /* general */ + case 'r': /* repr format */ + case 's': /* str format */ + break; + case 'E': + lc_format_code = 'e'; + break; + case 'F': + lc_format_code = 'f'; + break; + case 'G': + lc_format_code = 'g'; + break; + default: + PyErr_BadInternalCall(); + return NULL; + } + + if (format_code != lc_format_code) + float_strings = uc_float_strings; + + /* From the format code, compute the mode and make any adjustments as + needed. */ + switch (lc_format_code) { + case 'e': + mode = 2; + precision++; + break; + case 'f': + mode = 3; + break; + case 'g': + mode = 2; + /* precision 0 makes no sense for 'g' format; interpret as 1 */ + if (precision == 0) + precision = 1; + break; + case 'r': + /* "repr" pseudo-mode */ + mode = 0; + /* Supplied precision is unused, must be 0. */ + if (precision != 0) { + PyErr_BadInternalCall(); + return NULL; + } + break; + case 's': + mode = 2; + /* Supplied precision is unused, must be 0. */ + if (precision != 0) { + PyErr_BadInternalCall(); + return NULL; + } + precision = 12; + break; + } + + return format_float_short(val, lc_format_code, mode, precision, + flags & Py_DTSF_SIGN, + flags & Py_DTSF_ADD_DOT_0, + flags & Py_DTSF_ALT, + float_strings, type); } +#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */ -- cgit v0.12