From 38fd5b641366eedc74e4be3a0e4d2210f3bcdb5a Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Thu, 21 Sep 2000 05:43:11 +0000 Subject: Derived from Martin's SF patch 110609: support unbounded ints in %d,i,u,x,X,o formats. Note a curious extension to the std C rules: x, X and o formatting can never produce a sign character in C, so the '+' and ' ' flags are meaningless for them. But unbounded ints *can* produce a sign character under these conversions (no fixed- width bitstring is wide enough to hold all negative values in 2's-comp form). So these flags become meaningful in Python when formatting a Python long which is too big to fit in a C long. This required shuffling around existing code, which hacked x and X conversions to death when both the '#' and '0' flags were specified: the hacks weren't strong enough to deal with the simultaneous possibility of the ' ' or '+' flags too, since signs were always meaningless before for x and X conversions. Isomorphic shuffling was required in unicodeobject.c. Also added dozens of non-trivial new unbounded-int test cases to test_format.py. --- Include/stringobject.h | 2 + Lib/test/test_format.py | 159 ++++++++++++++++++++++++++++----- Objects/stringobject.c | 231 ++++++++++++++++++++++++++++++++++++++++++------ Objects/unicodeobject.c | 92 ++++++++++++++----- 4 files changed, 409 insertions(+), 75 deletions(-) diff --git a/Include/stringobject.h b/Include/stringobject.h index 3bba7bc..85ac0b6 100644 --- a/Include/stringobject.h +++ b/Include/stringobject.h @@ -59,6 +59,8 @@ extern DL_IMPORT(void) PyString_Concat(PyObject **, PyObject *); extern DL_IMPORT(void) PyString_ConcatAndDel(PyObject **, PyObject *); extern DL_IMPORT(int) _PyString_Resize(PyObject **, int); extern DL_IMPORT(PyObject *) PyString_Format(PyObject *, PyObject *); +extern DL_IMPORT(PyObject *) _PyString_FormatLong(PyObject*, int, int, + int, char**, int*); #ifdef INTERN_STRINGS extern DL_IMPORT(void) PyString_InternInPlace(PyObject **); diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index 9be9c76..266cbb6 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -6,32 +6,36 @@ import string, sys # they crash python) # test on unicode strings as well +overflowok = 1 + def testformat(formatstr, args, output=None): - if verbose: - if output: - print "%s %% %s =? %s ..." %\ - (repr(formatstr), repr(args), repr(output)), - else: - print "%s %% %s works? ..." % (repr(formatstr), repr(args)), - try: - result = formatstr % args - except OverflowError: - if verbose: - print 'overflow (this is fine)' - else: - if output and result != output: - if verbose: - print 'no' - print "%s %% %s == %s != %s" %\ - (repr(formatstr), repr(args), repr(result), repr(output)) - else: - if verbose: - print 'yes' + if verbose: + if output: + print "%s %% %s =? %s ..." %\ + (repr(formatstr), repr(args), repr(output)), + else: + print "%s %% %s works? ..." % (repr(formatstr), repr(args)), + try: + result = formatstr % args + except OverflowError: + if not overflowok: + raise + if verbose: + print 'overflow (this is fine)' + else: + if output and result != output: + if verbose: + print 'no' + print "%s %% %s == %s != %s" %\ + (repr(formatstr), repr(args), repr(result), repr(output)) + else: + if verbose: + print 'yes' def testboth(formatstr, *args): - testformat(formatstr, *args) - testformat(unicode(formatstr), *args) - + testformat(formatstr, *args) + testformat(unicode(formatstr), *args) + testboth("%.1d", (1,), "1") testboth("%.*d", (sys.maxint,1)) # expect overflow @@ -50,3 +54,112 @@ testboth("%#.*g", (110, -1.e+100/3.)) # test some ridiculously large precision, expect overflow testboth('%12.*f', (123456, 1.0)) +# Formatting of long integers. Overflow is not ok +overflowok = 0 +testboth("%x", 10L, "a") +testboth("%x", 100000000000L, "174876e800") +testboth("%o", 10L, "12") +testboth("%o", 100000000000L, "1351035564000") +testboth("%d", 10L, "10") +testboth("%d", 100000000000L, "100000000000") + +# Make sure big is too big to fit in a 64-bit int, else the unbounded +# int formatting will be sidestepped on some machines. That's vital, +# because bitwise (x, X, o) formats of regular Python ints never +# produce a sign ("+" or "-"). + +big = 123456789012345678901234567890L +testboth("%d", big, "123456789012345678901234567890") +testboth("%d", -big, "-123456789012345678901234567890") +testboth("%5d", -big, "-123456789012345678901234567890") +testboth("%31d", -big, "-123456789012345678901234567890") +testboth("%32d", -big, " -123456789012345678901234567890") +testboth("%-32d", -big, "-123456789012345678901234567890 ") +testboth("%032d", -big, "-0123456789012345678901234567890") +testboth("%-032d", -big, "-123456789012345678901234567890 ") +testboth("%034d", -big, "-000123456789012345678901234567890") +testboth("%034d", big, "0000123456789012345678901234567890") +testboth("%0+34d", big, "+000123456789012345678901234567890") +testboth("%+34d", big, " +123456789012345678901234567890") +testboth("%34d", big, " 123456789012345678901234567890") +testboth("%.2d", big, "123456789012345678901234567890") +testboth("%.30d", big, "123456789012345678901234567890") +testboth("%.31d", big, "0123456789012345678901234567890") +testboth("%32.31d", big, " 0123456789012345678901234567890") + +big = 0x1234567890abcdef12345L # 21 hex digits +testboth("%x", big, "1234567890abcdef12345") +testboth("%x", -big, "-1234567890abcdef12345") +testboth("%5x", -big, "-1234567890abcdef12345") +testboth("%22x", -big, "-1234567890abcdef12345") +testboth("%23x", -big, " -1234567890abcdef12345") +testboth("%-23x", -big, "-1234567890abcdef12345 ") +testboth("%023x", -big, "-01234567890abcdef12345") +testboth("%-023x", -big, "-1234567890abcdef12345 ") +testboth("%025x", -big, "-0001234567890abcdef12345") +testboth("%025x", big, "00001234567890abcdef12345") +testboth("%0+25x", big, "+0001234567890abcdef12345") +testboth("%+25x", big, " +1234567890abcdef12345") +testboth("%25x", big, " 1234567890abcdef12345") +testboth("%.2x", big, "1234567890abcdef12345") +testboth("%.21x", big, "1234567890abcdef12345") +testboth("%.22x", big, "01234567890abcdef12345") +testboth("%23.22x", big, " 01234567890abcdef12345") +testboth("%-23.22x", big, "01234567890abcdef12345 ") +testboth("%X", big, "1234567890ABCDEF12345") +testboth("%#X", big, "0X1234567890ABCDEF12345") +testboth("%#x", big, "0x1234567890abcdef12345") +testboth("%#x", -big, "-0x1234567890abcdef12345") +testboth("%#.23x", -big, "-0x001234567890abcdef12345") +testboth("%#+.23x", big, "+0x001234567890abcdef12345") +testboth("%# .23x", big, " 0x001234567890abcdef12345") +testboth("%#+.23X", big, "+0X001234567890ABCDEF12345") +testboth("%#-+.23X", big, "+0X001234567890ABCDEF12345") +testboth("%#-+26.23X", big, "+0X001234567890ABCDEF12345") +testboth("%#-+27.23X", big, "+0X001234567890ABCDEF12345 ") +testboth("%#+27.23X", big, " +0X001234567890ABCDEF12345") +# next one gets two leading zeroes from precision, and another from the +# 0 flag and the width +testboth("%#+027.23X", big, "+0X0001234567890ABCDEF12345") +# same, except no 0 flag +testboth("%#+27.23X", big, " +0X001234567890ABCDEF12345") + +big = 012345670123456701234567012345670L # 32 octal digits +testboth("%o", big, "12345670123456701234567012345670") +testboth("%o", -big, "-12345670123456701234567012345670") +testboth("%5o", -big, "-12345670123456701234567012345670") +testboth("%33o", -big, "-12345670123456701234567012345670") +testboth("%34o", -big, " -12345670123456701234567012345670") +testboth("%-34o", -big, "-12345670123456701234567012345670 ") +testboth("%034o", -big, "-012345670123456701234567012345670") +testboth("%-034o", -big, "-12345670123456701234567012345670 ") +testboth("%036o", -big, "-00012345670123456701234567012345670") +testboth("%036o", big, "000012345670123456701234567012345670") +testboth("%0+36o", big, "+00012345670123456701234567012345670") +testboth("%+36o", big, " +12345670123456701234567012345670") +testboth("%36o", big, " 12345670123456701234567012345670") +testboth("%.2o", big, "12345670123456701234567012345670") +testboth("%.32o", big, "12345670123456701234567012345670") +testboth("%.33o", big, "012345670123456701234567012345670") +testboth("%34.33o", big, " 012345670123456701234567012345670") +testboth("%-34.33o", big, "012345670123456701234567012345670 ") +testboth("%o", big, "12345670123456701234567012345670") +testboth("%#o", big, "012345670123456701234567012345670") +testboth("%#o", -big, "-012345670123456701234567012345670") +testboth("%#.34o", -big, "-0012345670123456701234567012345670") +testboth("%#+.34o", big, "+0012345670123456701234567012345670") +testboth("%# .34o", big, " 0012345670123456701234567012345670") +testboth("%#+.34o", big, "+0012345670123456701234567012345670") +testboth("%#-+.34o", big, "+0012345670123456701234567012345670") +testboth("%#-+37.34o", big, "+0012345670123456701234567012345670 ") +testboth("%#+37.34o", big, " +0012345670123456701234567012345670") +# next one gets one leading zero from precision +testboth("%.33o", big, "012345670123456701234567012345670") +# base marker shouldn't change that, since "0" is redundant +testboth("%#.33o", big, "012345670123456701234567012345670") +# but reduce precision, and base marker should add a zero +testboth("%#.32o", big, "012345670123456701234567012345670") +# one leading zero from precision, and another from "0" flag & width +testboth("%034.33o", big, "0012345670123456701234567012345670") +# base marker shouldn't change that +testboth("%0#34.33o", big, "0012345670123456701234567012345670") diff --git a/Objects/stringobject.c b/Objects/stringobject.c index cadca16..acae880 100644 --- a/Objects/stringobject.c +++ b/Objects/stringobject.c @@ -2427,6 +2427,13 @@ getnextarg(PyObject *args, int arglen, int *p_argidx) return NULL; } +/* Format codes + * F_LJUST '-' + * F_SIGN '+' + * F_BLANK ' ' + * F_ALT '#' + * F_ZERO '0' + */ #define F_LJUST (1<<0) #define F_SIGN (1<<1) #define F_BLANK (1<<2) @@ -2464,22 +2471,164 @@ formatfloat(char *buf, size_t buflen, int flags, return strlen(buf); } +/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and + * the F_ALT flag, for Python's long (unbounded) ints. It's not used for + * Python's regular ints. + * Return value: a new PyString*, or NULL if error. + * . *pbuf is set to point into it, + * *plen set to the # of chars following that. + * Caller must decref it when done using pbuf. + * The string starting at *pbuf is of the form + * "-"? ("0x" | "0X")? digit+ + * "0x"/"0X" are present only for x and X conversions, with F_ALT + * set in flags. The case of hex digits will be correct, + * There will be at least prec digits, zero-filled on the left if + * necessary to get that many. + * val object to be converted + * flags bitmask of format flags; only F_ALT is looked at + * prec minimum number of digits; 0-fill on left if needed + * type a character in [duoxX]; u acts the same as d + * + * CAUTION: o, x and X conversions on regular ints can never + * produce a '-' sign, but can for Python's unbounded ints. + */ +PyObject* +_PyString_FormatLong(PyObject *val, int flags, int prec, int type, + char **pbuf, int *plen) +{ + PyObject *result = NULL; + char *buf; + int i; + int sign; /* 1 if '-', else 0 */ + int len; /* number of characters */ + int numdigits; /* len == numnondigits + numdigits */ + int numnondigits = 0; + + switch (type) { + case 'd': + case 'u': + result = val->ob_type->tp_str(val); + break; + case 'o': + result = val->ob_type->tp_as_number->nb_oct(val); + break; + case 'x': + case 'X': + numnondigits = 2; + result = val->ob_type->tp_as_number->nb_hex(val); + break; + default: + assert(!"'type' not in [duoxX]"); + } + if (!result) + return NULL; + + /* To modify the string in-place, there can only be one reference. */ + if (result->ob_refcnt != 1) { + PyErr_BadInternalCall(); + return NULL; + } + buf = PyString_AsString(result); + len = PyString_Size(result); + if (buf[len-1] == 'L') { + --len; + buf[len] = '\0'; + } + sign = buf[0] == '-'; + numnondigits += sign; + numdigits = len - numnondigits; + assert(numdigits > 0); + + /* Get rid of base marker unless F_ALT */ + if ((flags & F_ALT) == 0) { + /* Need to skip 0x, 0X or 0. */ + int skipped = 0; + switch (type) { + case 'o': + assert(buf[sign] == '0'); + /* If 0 is only digit, leave it alone. */ + if (numdigits > 1) { + skipped = 1; + --numdigits; + } + break; + case 'x': + case 'X': + assert(buf[sign] == '0'); + assert(buf[sign + 1] == 'x'); + skipped = 2; + numnondigits -= 2; + break; + } + if (skipped) { + buf += skipped; + len -= skipped; + if (sign) + buf[0] = '-'; + } + assert(len == numnondigits + numdigits); + assert(numdigits > 0); + } + + /* Fill with leading zeroes to meet minimum width. */ + if (prec > numdigits) { + PyObject *r1 = PyString_FromStringAndSize(NULL, + numnondigits + prec); + char *b1; + if (!r1) { + Py_DECREF(result); + return NULL; + } + b1 = PyString_AS_STRING(r1); + for (i = 0; i < numnondigits; ++i) + *b1++ = *buf++; + for (i = 0; i < prec - numdigits; i++) + *b1++ = '0'; + for (i = 0; i < numdigits; i++) + *b1++ = *buf++; + *b1 = '\0'; + Py_DECREF(result); + result = r1; + buf = PyString_AS_STRING(result); + len = numnondigits + prec; + } + + /* Fix up case for hex conversions. */ + switch (type) { + case 'x': + /* Need to convert all upper case letters to lower case. */ + for (i = 0; i < len; i++) + if (buf[i] >= 'A' && buf[i] <= 'F') + buf[i] += 'a'-'A'; + break; + case 'X': + /* Need to convert 0x to 0X (and -0x to -0X). */ + if (buf[sign + 1] == 'x') + buf[sign + 1] = 'X'; + break; + } + *pbuf = buf; + *plen = len; + return result; +} + static int formatint(char *buf, size_t buflen, int flags, int prec, int type, PyObject *v) { /* fmt = '%#.' + `prec` + 'l' + `type` - worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/ - char fmt[20]; + worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine) + + 1 + 1 = 24 */ + char fmt[64]; /* plenty big enough! */ long x; if (!PyArg_Parse(v, "l;int argument required", &x)) return -1; if (prec < 0) prec = 1; sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type); - /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal)) + /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal)) worst case buf = '0x' + [0-9]*prec, where prec >= 11 */ - if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) { + if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) { PyErr_SetString(PyExc_OverflowError, "formatted integer is too long (precision too long?)"); return -1; @@ -2752,25 +2901,29 @@ PyString_Format(PyObject *format, PyObject *args) case 'X': if (c == 'i') c = 'd'; - pbuf = formatbuf; - len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v); - if (len < 0) - goto error; - sign = (c == 'd'); - if (flags&F_ZERO) { - fill = '0'; - if ((flags&F_ALT) && - (c == 'x' || c == 'X') && - pbuf[0] == '0' && pbuf[1] == c) { - *res++ = *pbuf++; - *res++ = *pbuf++; - rescnt -= 2; - len -= 2; - width -= 2; - if (width < 0) - width = 0; - } + if (PyLong_Check(v) && PyLong_AsLong(v) == -1 + && PyErr_Occurred()) { + /* Too big for a C long. */ + PyErr_Clear(); + temp = _PyString_FormatLong(v, flags, + prec, c, &pbuf, &len); + if (!temp) + goto error; + /* unbounded ints can always produce + a sign character! */ + sign = 1; + } + else { + pbuf = formatbuf; + len = formatint(pbuf, sizeof(formatbuf), + flags, prec, c, v); + if (len < 0) + goto error; + /* only d conversion is signed */ + sign = c == 'd'; } + if (flags & F_ZERO) + fill = '0'; break; case 'e': case 'E': @@ -2782,7 +2935,7 @@ PyString_Format(PyObject *format, PyObject *args) if (len < 0) goto error; sign = 1; - if (flags&F_ZERO) + if (flags & F_ZERO) fill = '0'; break; case 'c': @@ -2807,11 +2960,11 @@ PyString_Format(PyObject *format, PyObject *args) else if (flags & F_BLANK) sign = ' '; else - sign = '\0'; + sign = 0; } if (width < len) width = len; - if (rescnt < width + (sign != '\0')) { + if (rescnt < width + (sign != 0)) { reslen -= rescnt; rescnt = width + fmtcnt + 100; reslen += rescnt; @@ -2827,14 +2980,36 @@ PyString_Format(PyObject *format, PyObject *args) if (width > len) width--; } - if (width > len && !(flags&F_LJUST)) { + if ((flags & F_ALT) && (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + if (fill != ' ') { + *res++ = *pbuf++; + *res++ = *pbuf++; + } + rescnt -= 2; + width -= 2; + if (width < 0) + width = 0; + len -= 2; + } + if (width > len && !(flags & F_LJUST)) { do { --rescnt; *res++ = fill; } while (--width > len); } - if (sign && fill == ' ') - *res++ = sign; + if (fill == ' ') { + if (sign) + *res++ = sign; + if ((flags & F_ALT) && + (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + *res++ = *pbuf++; + *res++ = *pbuf++; + } + } memcpy(res, pbuf, len); res += len; rescnt -= len; diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 76bb92a..1559542 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4668,6 +4668,25 @@ formatfloat(Py_UNICODE *buf, return usprintf(buf, fmt, x); } +static PyObject* +formatlong(PyObject *val, int flags, int prec, int type) +{ + char *buf; + int i, len; + PyObject *str; /* temporary string object. */ + PyUnicodeObject *result; + + str = _PyString_FormatLong(val, flags, prec, type, &buf, &len); + if (!str) + return NULL; + result = _PyUnicode_New(len); + for (i = 0; i < len; i++) + result->str[i] = buf[i]; + result->str[len] = 0; + Py_DECREF(str); + return (PyObject*)result; +} + static int formatint(Py_UNICODE *buf, size_t buflen, @@ -4677,8 +4696,9 @@ formatint(Py_UNICODE *buf, PyObject *v) { /* fmt = '%#.' + `prec` + 'l' + `type` - worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/ - char fmt[20]; + worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine) + + 1 + 1 = 24*/ + char fmt[64]; /* plenty big enough! */ long x; x = PyInt_AsLong(v); @@ -5006,26 +5026,29 @@ PyObject *PyUnicode_Format(PyObject *format, case 'X': if (c == 'i') c = 'd'; - pbuf = formatbuf; - len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), - flags, prec, c, v); - if (len < 0) - goto onError; - sign = (c == 'd'); - if (flags & F_ZERO) { - fill = '0'; - if ((flags&F_ALT) && - (c == 'x' || c == 'X') && - pbuf[0] == '0' && pbuf[1] == c) { - *res++ = *pbuf++; - *res++ = *pbuf++; - rescnt -= 2; - len -= 2; - width -= 2; - if (width < 0) - width = 0; - } + if (PyLong_Check(v) && PyLong_AsLong(v) == -1 + && PyErr_Occurred()) { + PyErr_Clear(); + temp = formatlong(v, flags, prec, c); + if (!temp) + goto onError; + pbuf = PyUnicode_AS_UNICODE(temp); + len = PyUnicode_GET_SIZE(temp); + /* unbounded ints can always produce + a sign character! */ + sign = 1; } + else { + pbuf = formatbuf; + len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), + flags, prec, c, v); + if (len < 0) + goto onError; + /* only d conversion is signed */ + sign = c == 'd'; + } + if (flags & F_ZERO) + fill = '0'; break; case 'e': @@ -5039,7 +5062,7 @@ PyObject *PyUnicode_Format(PyObject *format, if (len < 0) goto onError; sign = 1; - if (flags&F_ZERO) + if (flags & F_ZERO) fill = '0'; break; @@ -5086,14 +5109,35 @@ PyObject *PyUnicode_Format(PyObject *format, if (width > len) width--; } + if ((flags & F_ALT) && (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + if (fill != ' ') { + *res++ = *pbuf++; + *res++ = *pbuf++; + } + rescnt -= 2; + width -= 2; + if (width < 0) + width = 0; + len -= 2; + } if (width > len && !(flags & F_LJUST)) { do { --rescnt; *res++ = fill; } while (--width > len); } - if (sign && fill == ' ') - *res++ = sign; + if (fill == ' ') { + if (sign) + *res++ = sign; + if ((flags & F_ALT) && (c == 'x' || c == 'X')) { + assert(pbuf[0] == '0'); + assert(pbuf[1] == c); + *res++ = *pbuf++; + *res++ = *pbuf++; + } + } memcpy(res, pbuf, len * sizeof(Py_UNICODE)); res += len; rescnt -= len; -- cgit v0.12