summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2000-09-21 05:43:11 (GMT)
committerTim Peters <tim.peters@gmail.com>2000-09-21 05:43:11 (GMT)
commit38fd5b641366eedc74e4be3a0e4d2210f3bcdb5a (patch)
tree38536cf33e6f83fa3ca8af62dbafebcd4dfd5921 /Objects
parent31575ce8172d40575be3c3d7a3a4a51d4aaf1a86 (diff)
downloadcpython-38fd5b641366eedc74e4be3a0e4d2210f3bcdb5a.zip
cpython-38fd5b641366eedc74e4be3a0e4d2210f3bcdb5a.tar.gz
cpython-38fd5b641366eedc74e4be3a0e4d2210f3bcdb5a.tar.bz2
Derived from Martin's SF patch 110609: support unbounded ints in %d,i,u,x,X,o formats.
Note a curious extension to the std C rules: x, X and o formatting can never produce a sign character in C, so the '+' and ' ' flags are meaningless for them. But unbounded ints *can* produce a sign character under these conversions (no fixed- width bitstring is wide enough to hold all negative values in 2's-comp form). So these flags become meaningful in Python when formatting a Python long which is too big to fit in a C long. This required shuffling around existing code, which hacked x and X conversions to death when both the '#' and '0' flags were specified: the hacks weren't strong enough to deal with the simultaneous possibility of the ' ' or '+' flags too, since signs were always meaningless before for x and X conversions. Isomorphic shuffling was required in unicodeobject.c. Also added dozens of non-trivial new unbounded-int test cases to test_format.py.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/stringobject.c231
-rw-r--r--Objects/unicodeobject.c92
2 files changed, 271 insertions, 52 deletions
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index cadca16..acae880 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -2427,6 +2427,13 @@ getnextarg(PyObject *args, int arglen, int *p_argidx)
return NULL;
}
+/* Format codes
+ * F_LJUST '-'
+ * F_SIGN '+'
+ * F_BLANK ' '
+ * F_ALT '#'
+ * F_ZERO '0'
+ */
#define F_LJUST (1<<0)
#define F_SIGN (1<<1)
#define F_BLANK (1<<2)
@@ -2464,22 +2471,164 @@ formatfloat(char *buf, size_t buflen, int flags,
return strlen(buf);
}
+/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
+ * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
+ * Python's regular ints.
+ * Return value: a new PyString*, or NULL if error.
+ * . *pbuf is set to point into it,
+ * *plen set to the # of chars following that.
+ * Caller must decref it when done using pbuf.
+ * The string starting at *pbuf is of the form
+ * "-"? ("0x" | "0X")? digit+
+ * "0x"/"0X" are present only for x and X conversions, with F_ALT
+ * set in flags. The case of hex digits will be correct,
+ * There will be at least prec digits, zero-filled on the left if
+ * necessary to get that many.
+ * val object to be converted
+ * flags bitmask of format flags; only F_ALT is looked at
+ * prec minimum number of digits; 0-fill on left if needed
+ * type a character in [duoxX]; u acts the same as d
+ *
+ * CAUTION: o, x and X conversions on regular ints can never
+ * produce a '-' sign, but can for Python's unbounded ints.
+ */
+PyObject*
+_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
+ char **pbuf, int *plen)
+{
+ PyObject *result = NULL;
+ char *buf;
+ int i;
+ int sign; /* 1 if '-', else 0 */
+ int len; /* number of characters */
+ int numdigits; /* len == numnondigits + numdigits */
+ int numnondigits = 0;
+
+ switch (type) {
+ case 'd':
+ case 'u':
+ result = val->ob_type->tp_str(val);
+ break;
+ case 'o':
+ result = val->ob_type->tp_as_number->nb_oct(val);
+ break;
+ case 'x':
+ case 'X':
+ numnondigits = 2;
+ result = val->ob_type->tp_as_number->nb_hex(val);
+ break;
+ default:
+ assert(!"'type' not in [duoxX]");
+ }
+ if (!result)
+ return NULL;
+
+ /* To modify the string in-place, there can only be one reference. */
+ if (result->ob_refcnt != 1) {
+ PyErr_BadInternalCall();
+ return NULL;
+ }
+ buf = PyString_AsString(result);
+ len = PyString_Size(result);
+ if (buf[len-1] == 'L') {
+ --len;
+ buf[len] = '\0';
+ }
+ sign = buf[0] == '-';
+ numnondigits += sign;
+ numdigits = len - numnondigits;
+ assert(numdigits > 0);
+
+ /* Get rid of base marker unless F_ALT */
+ if ((flags & F_ALT) == 0) {
+ /* Need to skip 0x, 0X or 0. */
+ int skipped = 0;
+ switch (type) {
+ case 'o':
+ assert(buf[sign] == '0');
+ /* If 0 is only digit, leave it alone. */
+ if (numdigits > 1) {
+ skipped = 1;
+ --numdigits;
+ }
+ break;
+ case 'x':
+ case 'X':
+ assert(buf[sign] == '0');
+ assert(buf[sign + 1] == 'x');
+ skipped = 2;
+ numnondigits -= 2;
+ break;
+ }
+ if (skipped) {
+ buf += skipped;
+ len -= skipped;
+ if (sign)
+ buf[0] = '-';
+ }
+ assert(len == numnondigits + numdigits);
+ assert(numdigits > 0);
+ }
+
+ /* Fill with leading zeroes to meet minimum width. */
+ if (prec > numdigits) {
+ PyObject *r1 = PyString_FromStringAndSize(NULL,
+ numnondigits + prec);
+ char *b1;
+ if (!r1) {
+ Py_DECREF(result);
+ return NULL;
+ }
+ b1 = PyString_AS_STRING(r1);
+ for (i = 0; i < numnondigits; ++i)
+ *b1++ = *buf++;
+ for (i = 0; i < prec - numdigits; i++)
+ *b1++ = '0';
+ for (i = 0; i < numdigits; i++)
+ *b1++ = *buf++;
+ *b1 = '\0';
+ Py_DECREF(result);
+ result = r1;
+ buf = PyString_AS_STRING(result);
+ len = numnondigits + prec;
+ }
+
+ /* Fix up case for hex conversions. */
+ switch (type) {
+ case 'x':
+ /* Need to convert all upper case letters to lower case. */
+ for (i = 0; i < len; i++)
+ if (buf[i] >= 'A' && buf[i] <= 'F')
+ buf[i] += 'a'-'A';
+ break;
+ case 'X':
+ /* Need to convert 0x to 0X (and -0x to -0X). */
+ if (buf[sign + 1] == 'x')
+ buf[sign + 1] = 'X';
+ break;
+ }
+ *pbuf = buf;
+ *plen = len;
+ return result;
+}
+
static int
formatint(char *buf, size_t buflen, int flags,
int prec, int type, PyObject *v)
{
/* fmt = '%#.' + `prec` + 'l' + `type`
- worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
- char fmt[20];
+ worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
+ + 1 + 1 = 24 */
+ char fmt[64]; /* plenty big enough! */
long x;
if (!PyArg_Parse(v, "l;int argument required", &x))
return -1;
if (prec < 0)
prec = 1;
sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
- /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
+ /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
- if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
+ if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
PyErr_SetString(PyExc_OverflowError,
"formatted integer is too long (precision too long?)");
return -1;
@@ -2752,25 +2901,29 @@ PyString_Format(PyObject *format, PyObject *args)
case 'X':
if (c == 'i')
c = 'd';
- pbuf = formatbuf;
- len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
- if (len < 0)
- goto error;
- sign = (c == 'd');
- if (flags&F_ZERO) {
- fill = '0';
- if ((flags&F_ALT) &&
- (c == 'x' || c == 'X') &&
- pbuf[0] == '0' && pbuf[1] == c) {
- *res++ = *pbuf++;
- *res++ = *pbuf++;
- rescnt -= 2;
- len -= 2;
- width -= 2;
- if (width < 0)
- width = 0;
- }
+ if (PyLong_Check(v) && PyLong_AsLong(v) == -1
+ && PyErr_Occurred()) {
+ /* Too big for a C long. */
+ PyErr_Clear();
+ temp = _PyString_FormatLong(v, flags,
+ prec, c, &pbuf, &len);
+ if (!temp)
+ goto error;
+ /* unbounded ints can always produce
+ a sign character! */
+ sign = 1;
+ }
+ else {
+ pbuf = formatbuf;
+ len = formatint(pbuf, sizeof(formatbuf),
+ flags, prec, c, v);
+ if (len < 0)
+ goto error;
+ /* only d conversion is signed */
+ sign = c == 'd';
}
+ if (flags & F_ZERO)
+ fill = '0';
break;
case 'e':
case 'E':
@@ -2782,7 +2935,7 @@ PyString_Format(PyObject *format, PyObject *args)
if (len < 0)
goto error;
sign = 1;
- if (flags&F_ZERO)
+ if (flags & F_ZERO)
fill = '0';
break;
case 'c':
@@ -2807,11 +2960,11 @@ PyString_Format(PyObject *format, PyObject *args)
else if (flags & F_BLANK)
sign = ' ';
else
- sign = '\0';
+ sign = 0;
}
if (width < len)
width = len;
- if (rescnt < width + (sign != '\0')) {
+ if (rescnt < width + (sign != 0)) {
reslen -= rescnt;
rescnt = width + fmtcnt + 100;
reslen += rescnt;
@@ -2827,14 +2980,36 @@ PyString_Format(PyObject *format, PyObject *args)
if (width > len)
width--;
}
- if (width > len && !(flags&F_LJUST)) {
+ if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ if (fill != ' ') {
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ rescnt -= 2;
+ width -= 2;
+ if (width < 0)
+ width = 0;
+ len -= 2;
+ }
+ if (width > len && !(flags & F_LJUST)) {
do {
--rescnt;
*res++ = fill;
} while (--width > len);
}
- if (sign && fill == ' ')
- *res++ = sign;
+ if (fill == ' ') {
+ if (sign)
+ *res++ = sign;
+ if ((flags & F_ALT) &&
+ (c == 'x' || c == 'X')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ }
memcpy(res, pbuf, len);
res += len;
rescnt -= len;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 76bb92a..1559542 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4668,6 +4668,25 @@ formatfloat(Py_UNICODE *buf,
return usprintf(buf, fmt, x);
}
+static PyObject*
+formatlong(PyObject *val, int flags, int prec, int type)
+{
+ char *buf;
+ int i, len;
+ PyObject *str; /* temporary string object. */
+ PyUnicodeObject *result;
+
+ str = _PyString_FormatLong(val, flags, prec, type, &buf, &len);
+ if (!str)
+ return NULL;
+ result = _PyUnicode_New(len);
+ for (i = 0; i < len; i++)
+ result->str[i] = buf[i];
+ result->str[len] = 0;
+ Py_DECREF(str);
+ return (PyObject*)result;
+}
+
static int
formatint(Py_UNICODE *buf,
size_t buflen,
@@ -4677,8 +4696,9 @@ formatint(Py_UNICODE *buf,
PyObject *v)
{
/* fmt = '%#.' + `prec` + 'l' + `type`
- worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
- char fmt[20];
+ worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
+ + 1 + 1 = 24*/
+ char fmt[64]; /* plenty big enough! */
long x;
x = PyInt_AsLong(v);
@@ -5006,26 +5026,29 @@ PyObject *PyUnicode_Format(PyObject *format,
case 'X':
if (c == 'i')
c = 'd';
- pbuf = formatbuf;
- len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
- flags, prec, c, v);
- if (len < 0)
- goto onError;
- sign = (c == 'd');
- if (flags & F_ZERO) {
- fill = '0';
- if ((flags&F_ALT) &&
- (c == 'x' || c == 'X') &&
- pbuf[0] == '0' && pbuf[1] == c) {
- *res++ = *pbuf++;
- *res++ = *pbuf++;
- rescnt -= 2;
- len -= 2;
- width -= 2;
- if (width < 0)
- width = 0;
- }
+ if (PyLong_Check(v) && PyLong_AsLong(v) == -1
+ && PyErr_Occurred()) {
+ PyErr_Clear();
+ temp = formatlong(v, flags, prec, c);
+ if (!temp)
+ goto onError;
+ pbuf = PyUnicode_AS_UNICODE(temp);
+ len = PyUnicode_GET_SIZE(temp);
+ /* unbounded ints can always produce
+ a sign character! */
+ sign = 1;
}
+ else {
+ pbuf = formatbuf;
+ len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
+ flags, prec, c, v);
+ if (len < 0)
+ goto onError;
+ /* only d conversion is signed */
+ sign = c == 'd';
+ }
+ if (flags & F_ZERO)
+ fill = '0';
break;
case 'e':
@@ -5039,7 +5062,7 @@ PyObject *PyUnicode_Format(PyObject *format,
if (len < 0)
goto onError;
sign = 1;
- if (flags&F_ZERO)
+ if (flags & F_ZERO)
fill = '0';
break;
@@ -5086,14 +5109,35 @@ PyObject *PyUnicode_Format(PyObject *format,
if (width > len)
width--;
}
+ if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ if (fill != ' ') {
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ rescnt -= 2;
+ width -= 2;
+ if (width < 0)
+ width = 0;
+ len -= 2;
+ }
if (width > len && !(flags & F_LJUST)) {
do {
--rescnt;
*res++ = fill;
} while (--width > len);
}
- if (sign && fill == ' ')
- *res++ = sign;
+ if (fill == ' ') {
+ if (sign)
+ *res++ = sign;
+ if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
+ assert(pbuf[0] == '0');
+ assert(pbuf[1] == c);
+ *res++ = *pbuf++;
+ *res++ = *pbuf++;
+ }
+ }
memcpy(res, pbuf, len * sizeof(Py_UNICODE));
res += len;
rescnt -= len;