summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Dickinson <dickinsm@gmail.com>2009-05-01 11:42:00 (GMT)
committerMark Dickinson <dickinsm@gmail.com>2009-05-01 11:42:00 (GMT)
commitf489caf5daa2b0f3a1bd951b585c834aab1a54c6 (patch)
treef57e0339acc0f5b702743d438738ad2f0d2aeba7
parentfb526ac34af13116733fcd0a306016e253d90c08 (diff)
downloadcpython-f489caf5daa2b0f3a1bd951b585c834aab1a54c6.zip
cpython-f489caf5daa2b0f3a1bd951b585c834aab1a54c6.tar.gz
cpython-f489caf5daa2b0f3a1bd951b585c834aab1a54c6.tar.bz2
Issue #5859: Remove use of fixed-length buffers for float formatting
in unicodeobject.c and the fallback version of PyOS_double_to_string. As a result, operations like '%.120e' % 12.34 no longer raise an exception.
-rw-r--r--Lib/test/string_tests.py9
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/unicodeobject.c84
-rw-r--r--Python/pystrtod.c94
4 files changed, 91 insertions, 99 deletions
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index 2a58e58..1637efb 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -1105,14 +1105,7 @@ class MixinStrUnicodeUserStringTest:
value = 0.01
for x in range(60):
value = value * 3.141592655 / 3.0 * 10.0
- # The formatfloat() code in stringobject.c and
- # unicodeobject.c uses a 120 byte buffer and switches from
- # 'f' formatting to 'g' at precision 50, so we expect
- # OverflowErrors for the ranges x < 50 and prec >= 67.
- if x < 50 and prec >= 67:
- self.checkraises(OverflowError, format, "__mod__", value)
- else:
- self.checkcall(format, "__mod__", value)
+ self.checkcall(format, "__mod__", value)
def test_inplace_rewrites(self):
# Check that strings don't copy and modify cached single-character strings
diff --git a/Misc/NEWS b/Misc/NEWS
index 257762b..3e8f5e6 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 3.1 beta 1?
Core and Builtins
-----------------
+- Issue #5859: Remove length restrictions for float formatting:
+ '%.67f' % 12.34 and '%.120e' % 12.34 no longer raise an exception.
+
- Issue #1588: Add complex.__format__. For example,
format(complex(1, 2./3), '.5') now produces a sensible result.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 3cea899..31b9a73 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8792,73 +8792,30 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
return NULL;
}
-static void
-strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
-{
- register Py_ssize_t i;
- for (i = len - 1; i >= 0; i--)
- buffer[i] = (Py_UNICODE) charbuffer[i];
-}
+/* Returns a new reference to a PyUnicode object, or NULL on failure. */
-static int
-formatfloat(Py_UNICODE *buf,
- size_t buflen,
- int flags,
- int prec,
- int type,
- PyObject *v)
-{
- /* eric.smith: To minimize disturbances in PyUnicode_Format (the
- only caller of this routine), I'm going to keep the existing
- API to this function. That means that we'll allocate memory and
- then copy back into the supplied buffer. But that's better than
- all of the changes that would be required in PyUnicode_Format
- because it does lots of memory management tricks. */
-
- char* p = NULL;
- int result = -1;
+static PyObject *
+formatfloat(PyObject *v, int flags, int prec, int type)
+{
+ char *p;
+ PyObject *result;
double x;
- Py_ssize_t len;
x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred())
- goto done;
+ return NULL;
+
if (prec < 0)
prec = 6;
- /* make sure that the decimal representation of precision really does
- need at most 10 digits: platforms with sizeof(int) == 8 exist! */
- if (prec > 0x7fffffffL) {
- PyErr_SetString(PyExc_OverflowError,
- "outrageously large precision "
- "for formatted float");
- goto done;
- }
-
if (type == 'f' && fabs(x) >= 1e50)
type = 'g';
- if (((type == 'g' || type == 'G') &&
- buflen <= (size_t)10 + (size_t)prec) ||
- ((type == 'f' || type == 'F') &&
- buflen <= (size_t)53 + (size_t)prec)) {
- PyErr_SetString(PyExc_OverflowError,
- "formatted float is too long (precision too large?)");
- goto done;
- }
-
p = PyOS_double_to_string(x, type, prec,
(flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
- len = strlen(p);
- if (len+1 >= buflen) {
- /* Caller supplied buffer is not large enough. */
- PyErr_NoMemory();
- goto done;
- }
- strtounicode(buf, p, len);
- result = Py_SAFE_DOWNCAST(len, Py_ssize_t, int);
-
-done:
+ if (p == NULL)
+ return NULL;
+ result = PyUnicode_FromStringAndSize(p, strlen(p));
PyMem_Free(p);
return result;
}
@@ -8940,14 +8897,9 @@ formatchar(Py_UNICODE *buf,
}
/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
-
- FORMATBUFLEN is the length of the buffer in which the floats, ints, &
- chars are formatted. XXX This is a magic number. Each formatting
- routine does bounds checking to ensure no overflow, but a better
- solution may be to malloc a buffer of appropriate size for each
- format. For now, the current solution is sufficient.
+ FORMATBUFLEN is the length of the buffer in which chars are formatted.
*/
-#define FORMATBUFLEN (size_t)120
+#define FORMATBUFLEN (size_t)10
PyObject *PyUnicode_Format(PyObject *format,
PyObject *args)
@@ -9012,7 +8964,7 @@ PyObject *PyUnicode_Format(PyObject *format,
Py_UNICODE *pbuf;
Py_UNICODE sign;
Py_ssize_t len;
- Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
+ Py_UNICODE formatbuf[FORMATBUFLEN]; /* For formatchar() */
fmt++;
if (*fmt == '(') {
@@ -9257,11 +9209,11 @@ PyObject *PyUnicode_Format(PyObject *format,
case 'F':
case 'g':
case 'G':
- pbuf = formatbuf;
- len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
- flags, prec, c, v);
- if (len < 0)
+ temp = formatfloat(v, flags, prec, c);
+ if (!temp)
goto onError;
+ pbuf = PyUnicode_AS_UNICODE(temp);
+ len = PyUnicode_GET_SIZE(temp);
sign = 1;
if (flags & F_ZERO)
fill = '0';
diff --git a/Python/pystrtod.c b/Python/pystrtod.c
index e68f5d7..1040610 100644
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@@ -620,12 +620,10 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
int flags,
int *type)
{
- char buf[128];
char format[32];
- Py_ssize_t len;
- char *result;
- char *p;
- int t;
+ Py_ssize_t bufsize;
+ char *buf;
+ int t, exp;
int upper = 0;
/* Validate format_code, and map upper and lower case */
@@ -669,6 +667,61 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
return NULL;
}
+ /* Here's a quick-and-dirty calculation to figure out how big a buffer
+ we need. In general, for a finite float we need:
+
+ 1 byte for each digit of the decimal significand, and
+
+ 1 for a possible sign
+ 1 for a possible decimal point
+ 2 for a possible [eE][+-]
+ 1 for each digit of the exponent; if we allow 19 digits
+ total then we're safe up to exponents of 2**63.
+ 1 for the trailing nul byte
+
+ This gives a total of 24 + the number of digits in the significand,
+ and the number of digits in the significand is:
+
+ for 'g' format: at most precision, except possibly
+ when precision == 0, when it's 1.
+ for 'e' format: precision+1
+ for 'f' format: precision digits after the point, at least 1
+ before. To figure out how many digits appear before the point
+ we have to examine the size of the number. If fabs(val) < 1.0
+ then there will be only one digit before the point. If
+ fabs(val) >= 1.0, then there are at most
+
+ 1+floor(log10(ceiling(fabs(val))))
+
+ digits before the point (where the 'ceiling' allows for the
+ possibility that the rounding rounds the integer part of val
+ up). A safe upper bound for the above quantity is
+ 1+floor(exp/3), where exp is the unique integer such that 0.5
+ <= fabs(val)/2**exp < 1.0. This exp can be obtained from
+ frexp.
+
+ So we allow room for precision+1 digits for all formats, plus an
+ extra floor(exp/3) digits for 'f' format.
+
+ */
+
+ if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
+ /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
+ bufsize = 5;
+ else {
+ bufsize = 25 + precision;
+ if (format_code == 'f' && fabs(val) >= 1.0) {
+ frexp(val, &exp);
+ bufsize += exp/3;
+ }
+ }
+
+ buf = PyMem_Malloc(bufsize);
+ if (buf == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
/* Handle nan and inf. */
if (Py_IS_NAN(val)) {
strcpy(buf, "nan");
@@ -687,38 +740,29 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
(flags & Py_DTSF_ALT ? "#" : ""), precision,
format_code);
- _PyOS_ascii_formatd(buf, sizeof(buf), format, val, precision);
- }
-
- len = strlen(buf);
-
- /* Add 1 for the trailing 0 byte.
- Add 1 because we might need to make room for the sign.
- */
- result = PyMem_Malloc(len + 2);
- if (result == NULL) {
- PyErr_NoMemory();
- return NULL;
+ _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
}
- p = result;
/* Add sign when requested. It's convenient (esp. when formatting
complex numbers) to include a sign even for inf and nan. */
- if (flags & Py_DTSF_SIGN && buf[0] != '-')
- *p++ = '+';
-
- strcpy(p, buf);
-
+ if (flags & Py_DTSF_SIGN && buf[0] != '-') {
+ size_t len = strlen(buf);
+ /* the bufsize calculations above should ensure that we've got
+ space to add a sign */
+ assert((size_t)bufsize >= len+2);
+ memmove(buf+1, buf, len+1);
+ buf[0] = '+';
+ }
if (upper) {
/* Convert to upper case. */
char *p1;
- for (p1 = p; *p1; p1++)
+ for (p1 = buf; *p1; p1++)
*p1 = Py_TOUPPER(*p1);
}
if (type)
*type = t;
- return result;
+ return buf;
}
#else