summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
Diffstat (limited to 'Objects')
-rw-r--r--Objects/floatobject.c19
-rw-r--r--Objects/longobject.c13
-rw-r--r--Objects/stringlib/formatter.h966
-rw-r--r--Objects/stringlib/string_format.h831
-rw-r--r--Objects/stringlib/stringdefs.h23
-rw-r--r--Objects/stringlib/unicodedefs.h32
-rw-r--r--Objects/typeobject.c41
-rw-r--r--Objects/unicodeobject.c246
8 files changed, 2156 insertions, 15 deletions
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index 09efa12..ca94750 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -6,6 +6,8 @@
#include "Python.h"
+#include "formatter_unicode.h"
+
#include <ctype.h>
#if !defined(__STDC__)
@@ -1015,6 +1017,21 @@ float_getzero(PyObject *v, void *closure)
return PyFloat_FromDouble(0.0);
}
+static PyObject *
+float__format__(PyObject *self, PyObject *args)
+{
+ /* when back porting this to 2.6, check type of the format_spec
+ and call either unicode_long__format__ or
+ string_long__format__ */
+ return unicode_float__format__(self, args);
+}
+
+PyDoc_STRVAR(float__format__doc,
+"float.__format__(format_spec) -> string\n"
+"\n"
+"Formats the float according to format_spec.");
+
+
static PyMethodDef float_methods[] = {
{"conjugate", (PyCFunction)float_float, METH_NOARGS,
"Returns self, the complex conjugate of any float."},
@@ -1028,6 +1045,8 @@ static PyMethodDef float_methods[] = {
METH_O|METH_CLASS, float_getformat_doc},
{"__setformat__", (PyCFunction)float_setformat,
METH_VARARGS|METH_CLASS, float_setformat_doc},
+ {"__format__", (PyCFunction)float__format__,
+ METH_VARARGS, float__format__doc},
{NULL, NULL} /* sentinel */
};
diff --git a/Objects/longobject.c b/Objects/longobject.c
index ddf359d..b724edf 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -5,6 +5,8 @@
#include "Python.h"
#include "longintrepr.h"
+#include "formatter_unicode.h"
+
#include <ctype.h>
long
@@ -3593,6 +3595,16 @@ long_getN(PyLongObject *v, void *context) {
}
static PyObject *
+long__format__(PyObject *self, PyObject *args)
+{
+ /* when back porting this to 2.6, check type of the format_spec
+ and call either unicode_long__format__ or
+ string_long__format__ */
+ return unicode_long__format__(self, args);
+}
+
+
+static PyObject *
long_round(PyObject *self, PyObject *args)
{
#define UNDEF_NDIGITS (-0x7fffffff) /* Unlikely ndigits value */
@@ -3632,6 +3644,7 @@ static PyMethodDef long_methods[] = {
"Rounding an Integral returns itself.\n"
"Rounding with an ndigits arguments defers to float.__round__."},
{"__getnewargs__", (PyCFunction)long_getnewargs, METH_NOARGS},
+ {"__format__", (PyCFunction)long__format__, METH_VARARGS},
{NULL, NULL} /* sentinel */
};
diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h
new file mode 100644
index 0000000..b65244d
--- /dev/null
+++ b/Objects/stringlib/formatter.h
@@ -0,0 +1,966 @@
+/* implements the string, long, and float formatters. that is,
+ string.__format__, etc. */
+
+/* Before including this, you must include either:
+ stringlib/unicodedefs.h
+ stringlib/stringdefs.h
+
+ Also, you should define the names:
+ FORMAT_STRING
+ FORMAT_LONG
+ FORMAT_FLOAT
+ to be whatever you want the public names of these functions to
+ be. These are the only non-static functions defined here.
+*/
+
+/*
+ get_integer consumes 0 or more decimal digit characters from an
+ input string, updates *result with the corresponding positive
+ integer, and returns the number of digits consumed.
+
+ returns -1 on error.
+*/
+static int
+get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
+ Py_ssize_t *result)
+{
+ Py_ssize_t accumulator, digitval, oldaccumulator;
+ int numdigits;
+ accumulator = numdigits = 0;
+ for (;;(*ptr)++, numdigits++) {
+ if (*ptr >= end)
+ break;
+ digitval = STRINGLIB_TODECIMAL(**ptr);
+ if (digitval < 0)
+ break;
+ /*
+ This trick was copied from old Unicode format code. It's cute,
+ but would really suck on an old machine with a slow divide
+ implementation. Fortunately, in the normal case we do not
+ expect too many digits.
+ */
+ oldaccumulator = accumulator;
+ accumulator *= 10;
+ if ((accumulator+10)/10 != oldaccumulator+1) {
+ PyErr_Format(PyExc_ValueError,
+ "Too many decimal digits in format string");
+ return -1;
+ }
+ accumulator += digitval;
+ }
+ *result = accumulator;
+ return numdigits;
+}
+
+/************************************************************************/
+/*********** standard format specifier parsing **************************/
+/************************************************************************/
+
+/* returns true if this character is a specifier alignment token */
+Py_LOCAL_INLINE(int)
+is_alignment_token(STRINGLIB_CHAR c)
+{
+ switch (c) {
+ case '<': case '>': case '=': case '^':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* returns true if this character is a sign element */
+Py_LOCAL_INLINE(int)
+is_sign_element(STRINGLIB_CHAR c)
+{
+ switch (c) {
+ case ' ': case '+': case '-': case '(':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+
+typedef struct {
+ STRINGLIB_CHAR fill_char;
+ STRINGLIB_CHAR align;
+ STRINGLIB_CHAR sign;
+ Py_ssize_t width;
+ Py_ssize_t precision;
+ STRINGLIB_CHAR type;
+} InternalFormatSpec;
+
+/*
+ ptr points to the start of the format_spec, end points just past its end.
+ fills in format with the parsed information.
+ returns 1 on success, 0 on failure.
+ if failure, sets the exception
+*/
+static int
+parse_internal_render_format_spec(PyObject *format_spec,
+ InternalFormatSpec *format,
+ char default_type)
+{
+ STRINGLIB_CHAR *ptr = STRINGLIB_STR(format_spec);
+ STRINGLIB_CHAR *end = ptr + STRINGLIB_LEN(format_spec);
+
+ /* end-ptr is used throughout this code to specify the length of
+ the input string */
+
+ Py_ssize_t specified_width;
+
+ format->fill_char = '\0';
+ format->align = '\0';
+ format->sign = '\0';
+ format->width = -1;
+ format->precision = -1;
+ format->type = default_type;
+
+ /* If the second char is an alignment token,
+ then parse the fill char */
+ if (end-ptr >= 2 && is_alignment_token(ptr[1])) {
+ format->align = ptr[1];
+ format->fill_char = ptr[0];
+ ptr += 2;
+ } else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
+ format->align = ptr[0];
+ ptr++;
+ }
+
+ /* Parse the various sign options */
+ if (end-ptr >= 1 && is_sign_element(ptr[0])) {
+ format->sign = ptr[0];
+ ptr++;
+ if (end-ptr >= 1 && ptr[0] == ')') {
+ ptr++;
+ }
+ }
+
+ /* The special case for 0-padding (backwards compat) */
+ if (format->fill_char == '\0' &&
+ end-ptr >= 1 && ptr[0] == '0') {
+ format->fill_char = '0';
+ if (format->align == '\0') {
+ format->align = '=';
+ }
+ ptr++;
+ }
+
+ /* XXX add error checking */
+ specified_width = get_integer(&ptr, end, &format->width);
+
+ /* if specified_width is 0, we didn't consume any characters for
+ the width. in that case, reset the width to -1, because
+ get_integer() will have set it to zero */
+ if (specified_width == 0) {
+ format->width = -1;
+ }
+
+ /* Parse field precision */
+ if (end-ptr && ptr[0] == '.') {
+ ptr++;
+
+ /* XXX add error checking */
+ specified_width = get_integer(&ptr, end, &format->precision);
+
+ /* not having a precision after a dot is an error */
+ if (specified_width == 0) {
+ PyErr_Format(PyExc_ValueError,
+ "Format specifier missing precision");
+ return 0;
+ }
+
+ }
+
+ /* Finally, parse the type field */
+
+ if (end-ptr > 1) {
+ /* invalid conversion spec */
+ PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
+ return 0;
+ }
+
+ if (end-ptr == 1) {
+ format->type = ptr[0];
+ ptr++;
+ }
+
+ return 1;
+}
+
+
+/************************************************************************/
+/*********** common routines for numeric formatting *********************/
+/************************************************************************/
+
+/* describes the layout for an integer, see the comment in
+ _calc_integer_widths() for details */
+typedef struct {
+ Py_ssize_t n_lpadding;
+ Py_ssize_t n_spadding;
+ Py_ssize_t n_rpadding;
+ char lsign;
+ Py_ssize_t n_lsign;
+ char rsign;
+ Py_ssize_t n_rsign;
+ Py_ssize_t n_total; /* just a convenience, it's derivable from the
+ other fields */
+} NumberFieldWidths;
+
+/* not all fields of format are used. for example, precision is
+ unused. should this take discrete params in order to be more clear
+ about what it does? or is passing a single format parameter easier
+ and more efficient enough to justify a little obfuscation? */
+static void
+calc_number_widths(NumberFieldWidths *r, STRINGLIB_CHAR actual_sign,
+ Py_ssize_t n_digits, const InternalFormatSpec *format)
+{
+ r->n_lpadding = 0;
+ r->n_spadding = 0;
+ r->n_rpadding = 0;
+ r->lsign = '\0';
+ r->n_lsign = 0;
+ r->rsign = '\0';
+ r->n_rsign = 0;
+
+ /* the output will look like:
+ | |
+ | <lpadding> <lsign> <spadding> <digits> <rsign> <rpadding> |
+ | |
+
+ lsign and rsign are computed from format->sign and the actual
+ sign of the number
+
+ digits is already known
+
+ the total width is either given, or computed from the
+ actual digits
+
+ only one of lpadding, spadding, and rpadding can be non-zero,
+ and it's calculated from the width and other fields
+ */
+
+ /* compute the various parts we're going to write */
+ if (format->sign == '+') {
+ /* always put a + or - */
+ r->n_lsign = 1;
+ r->lsign = (actual_sign == '-' ? '-' : '+');
+ } else if (format->sign == '(') {
+ if (actual_sign == '-') {
+ r->n_lsign = 1;
+ r->lsign = '(';
+ r->n_rsign = 1;
+ r->rsign = ')';
+ }
+ } else if (format->sign == ' ') {
+ r->n_lsign = 1;
+ r->lsign = (actual_sign == '-' ? '-' : ' ');
+ } else {
+ /* non specified, or the default (-) */
+ if (actual_sign == '-') {
+ r->n_lsign = 1;
+ r->lsign = '-';
+ }
+ }
+
+ /* now the number of padding characters */
+ if (format->width == -1) {
+ /* no padding at all, nothing to do */
+ } else {
+ /* see if any padding is needed */
+ if (r->n_lsign + n_digits + r->n_rsign >= format->width) {
+ /* no padding needed, we're already bigger than the
+ requested width */
+ } else {
+ /* determine which of left, space, or right padding is
+ needed */
+ Py_ssize_t padding = format->width - (r->n_lsign + n_digits + r->n_rsign);
+ if (format->align == '<')
+ r->n_rpadding = padding;
+ else if (format->align == '>')
+ r->n_lpadding = padding;
+ else if (format->align == '^') {
+ r->n_lpadding = padding / 2;
+ r->n_rpadding = padding - r->n_lpadding;
+ } else
+ /* must be '=' */
+ r->n_spadding = padding;
+ }
+ }
+ r->n_total = r->n_lpadding + r->n_lsign + r->n_spadding +
+ n_digits + r->n_rsign + r->n_rpadding;
+}
+
+/* fill in the non-digit parts of a numbers's string representation,
+ as determined in _calc_integer_widths(). returns the pointer to
+ where the digits go. */
+static STRINGLIB_CHAR *
+fill_number(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
+ Py_ssize_t n_digits, STRINGLIB_CHAR fill_char)
+{
+ STRINGLIB_CHAR* p_digits;
+
+ if (spec->n_lpadding) {
+ STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
+ p_buf += spec->n_lpadding;
+ }
+ if (spec->n_lsign == 1) {
+ *p_buf++ = spec->lsign;
+ }
+ if (spec->n_spadding) {
+ STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
+ p_buf += spec->n_spadding;
+ }
+ p_digits = p_buf;
+ p_buf += n_digits;
+ if (spec->n_rsign == 1) {
+ *p_buf++ = spec->rsign;
+ }
+ if (spec->n_rpadding) {
+ STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
+ p_buf += spec->n_rpadding;
+ }
+ return p_digits;
+}
+
+/************************************************************************/
+/*********** string formatting ******************************************/
+/************************************************************************/
+
+static PyObject *
+format_string_internal(PyObject *value, const InternalFormatSpec *format)
+{
+ Py_ssize_t width; /* total field width */
+ Py_ssize_t lpad;
+ STRINGLIB_CHAR *dst;
+ STRINGLIB_CHAR *src = STRINGLIB_STR(value);
+ Py_ssize_t len = STRINGLIB_LEN(value);
+ PyObject *result = NULL;
+
+ /* sign is not allowed on strings */
+ if (format->sign != '\0') {
+ PyErr_SetString(PyExc_ValueError,
+ "Sign not allowed in string format specifier");
+ goto done;
+ }
+
+ /* '=' alignment not allowed on strings */
+ if (format->align == '=') {
+ PyErr_SetString(PyExc_ValueError,
+ "'=' alignment not allowed "
+ "in string format specifier");
+ goto done;
+ }
+
+ /* if precision is specified, output no more that format.precision
+ characters */
+ if (format->precision >= 0 && len >= format->precision) {
+ len = format->precision;
+ }
+
+ if (format->width >= 0) {
+ width = format->width;
+
+ /* but use at least len characters */
+ if (len > width) {
+ width = len;
+ }
+ } else {
+ /* not specified, use all of the chars and no more */
+ width = len;
+ }
+
+ /* allocate the resulting string */
+ result = STRINGLIB_NEW(NULL, width);
+ if (result == NULL)
+ goto done;
+
+ /* now write into that space */
+ dst = STRINGLIB_STR(result);
+
+ /* figure out how much leading space we need, based on the
+ aligning */
+ if (format->align == '>')
+ lpad = width - len;
+ else if (format->align == '^')
+ lpad = (width - len) / 2;
+ else
+ lpad = 0;
+
+ /* if right aligning, increment the destination allow space on the
+ left */
+ memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
+
+ /* do any padding */
+ if (width > len) {
+ STRINGLIB_CHAR fill_char = format->fill_char;
+ if (fill_char == '\0') {
+ /* use the default, if not specified */
+ fill_char = ' ';
+ }
+
+ /* pad on left */
+ if (lpad)
+ STRINGLIB_FILL(dst, fill_char, lpad);
+
+ /* pad on right */
+ if (width - len - lpad)
+ STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
+ }
+
+done:
+ return result;
+}
+
+
+/************************************************************************/
+/*********** long formatting ********************************************/
+/************************************************************************/
+
+static PyObject *
+format_long_internal(PyObject *value, const InternalFormatSpec *format)
+{
+ PyObject *result = NULL;
+ int total_leading_chars_to_skip = 0; /* also includes sign, if
+ present */
+ STRINGLIB_CHAR sign = '\0';
+ STRINGLIB_CHAR *p;
+ Py_ssize_t n_digits; /* count of digits need from the computed
+ string */
+ Py_ssize_t len;
+ Py_ssize_t tmp;
+ NumberFieldWidths spec;
+ long x;
+
+ /* no precision allowed on integers */
+ if (format->precision != -1) {
+ PyErr_SetString(PyExc_ValueError,
+ "Precision not allowed in integer format specifier");
+ goto done;
+ }
+
+
+ /* special case for character formatting */
+ if (format->type == 'c') {
+ /* error to specify a sign */
+ if (format->sign != '\0') {
+ PyErr_SetString(PyExc_ValueError,
+ "Sign not allowed with integer"
+ " format specifier 'c'");
+ goto done;
+ }
+
+ /* taken from unicodeobject.c formatchar() */
+ /* Integer input truncated to a character */
+ x = PyInt_AsLong(value);
+ if (x == -1 && PyErr_Occurred())
+ goto done;
+#ifdef Py_UNICODE_WIDE
+ if (x < 0 || x > 0x10ffff) {
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(0x110000) "
+ "(wide Python build)");
+ goto done;
+ }
+#else
+ if (x < 0 || x > 0xffff) {
+ PyErr_SetString(PyExc_OverflowError,
+ "%c arg not in range(0x10000) "
+ "(narrow Python build)");
+ goto done;
+ }
+#endif
+ result = STRINGLIB_NEW(NULL, 1);
+ if (result == NULL)
+ goto done;
+ p = STRINGLIB_STR(result);
+ p[0] = (Py_UNICODE) x;
+ n_digits = len = 1;
+ } else {
+ int base;
+ int format_leading_chars_to_skip; /* characters added by
+ PyNumber_ToBase that we
+ want to skip over.
+ instead of using them,
+ we'll compute our
+ own. */
+ /* compute the base and how many characters will be added by
+ PyNumber_ToBase */
+ switch (format->type) {
+ case 'b':
+ base = 2;
+ format_leading_chars_to_skip = 2; /* 0b */
+ break;
+ case 'o':
+ base = 8;
+ format_leading_chars_to_skip = 2; /* 0o */
+ break;
+ case 'x':
+ case 'X':
+ base = 16;
+ format_leading_chars_to_skip = 2; /* 0x */
+ break;
+ default: /* shouldn't be needed, but stops a compiler warning */
+ case 'd':
+ base = 10;
+ format_leading_chars_to_skip = 0;
+ break;
+ }
+
+ /* do the hard part, converting to a string in a given base */
+ result = PyNumber_ToBase(value, base);
+ if (result == NULL)
+ goto done;
+
+ n_digits = STRINGLIB_LEN(result);
+ len = n_digits;
+ p = STRINGLIB_STR(result);
+
+ /* if X, convert to uppercase */
+ if (format->type == 'X')
+ for (tmp = 0; tmp < len; tmp++)
+ p[tmp] = STRINGLIB_TOUPPER(p[tmp]);
+
+ /* is a sign character present in the output? if so, remember it
+ and skip it */
+ sign = p[0];
+ if (sign == '-') {
+ total_leading_chars_to_skip += 1;
+ n_digits--;
+ }
+
+ /* skip over the leading digits (0x, 0b, etc.) */
+ assert(n_digits >= format_leading_chars_to_skip + 1);
+ n_digits -= format_leading_chars_to_skip;
+ total_leading_chars_to_skip += format_leading_chars_to_skip;
+ }
+
+ calc_number_widths(&spec, sign, n_digits, format);
+
+ /* if the buffer is getting bigger, realloc it. if it's getting
+ smaller, don't realloc because we need to move the results
+ around first. realloc after we've done that */
+
+ if (spec.n_total > len) {
+ if (STRINGLIB_RESIZE(&result, spec.n_total) < 0)
+ goto done;
+ /* recalc, because string might have moved */
+ p = STRINGLIB_STR(result);
+ }
+
+ /* copy the characters into position first, since we're going to
+ overwrite some of that space */
+ /* we need to move if the number of left padding in the output is
+ different from the number of characters we need to skip */
+ if ((spec.n_lpadding + spec.n_lsign + spec.n_spadding) !=
+ total_leading_chars_to_skip) {
+ memmove(p + (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
+ p + total_leading_chars_to_skip,
+ n_digits * sizeof(STRINGLIB_CHAR));
+ }
+
+ /* now fill in the non-digit parts */
+ fill_number(p, &spec, n_digits,
+ format->fill_char == '\0' ? ' ' : format->fill_char);
+
+ /* if we're getting smaller, realloc now */
+ if (spec.n_total < len) {
+ if (STRINGLIB_RESIZE(&result, spec.n_total) < 0)
+ goto done;
+ }
+
+done:
+ return result;
+}
+
+
+/************************************************************************/
+/*********** float formatting *******************************************/
+/************************************************************************/
+
+/* taken from unicodeobject.c */
+static Py_ssize_t
+strtounicode(Py_UNICODE *buffer, const char *charbuffer)
+{
+ register Py_ssize_t i;
+ Py_ssize_t len = strlen(charbuffer);
+ for (i = len - 1; i >= 0; i--)
+ buffer[i] = (Py_UNICODE) charbuffer[i];
+
+ return len;
+}
+
+/* the callback function to call to do the actual float formatting.
+ it matches the definition of PyOS_ascii_formatd */
+typedef char*
+(*DoubleSnprintfFunction)(char *buffer, size_t buf_len,
+ const char *format, double d);
+
+/* just a wrapper to make PyOS_snprintf look like DoubleSnprintfFunction */
+static char*
+snprintf_double(char *buffer, size_t buf_len, const char *format, double d)
+{
+ PyOS_snprintf(buffer, buf_len, format, d);
+ return NULL;
+}
+
+/* see FORMATBUFLEN in unicodeobject.c */
+#define FLOAT_FORMATBUFLEN 120
+
+/* much of this is taken from unicodeobject.c */
+/* use type instead of format->type, so that it can be overridden by
+ format_number() */
+static PyObject *
+_format_float(STRINGLIB_CHAR type, PyObject *value,
+ const InternalFormatSpec *format,
+ DoubleSnprintfFunction snprintf)
+{
+ /* fmt = '%.' + `prec` + `type` + '%%'
+ worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
+ char fmt[20];
+
+ /* taken from unicodeobject.c */
+ /* Worst case length calc to ensure no buffer overrun:
+
+ 'g' formats:
+ fmt = %#.<prec>g
+ buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
+ for any double rep.)
+ len = 1 + prec + 1 + 2 + 5 = 9 + prec
+
+ 'f' formats:
+ buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
+ len = 1 + 50 + 1 + prec = 52 + prec
+
+ If prec=0 the effective precision is 1 (the leading digit is
+ always given), therefore increase the length by one.
+
+ */
+ char charbuf[FLOAT_FORMATBUFLEN];
+ Py_ssize_t n_digits;
+ double x;
+ Py_ssize_t precision = format->precision;
+ PyObject *result = NULL;
+ STRINGLIB_CHAR sign;
+ char* trailing = "";
+ STRINGLIB_CHAR *p;
+ NumberFieldWidths spec;
+
+#if STRINGLIB_IS_UNICODE
+ Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
+#endif
+
+ /* first, do the conversion as 8-bit chars, using the platform's
+ snprintf. then, if needed, convert to unicode. */
+
+ /* 'F' is the same as 'f', per the PEP */
+ if (type == 'F')
+ type = 'f';
+
+ x = PyFloat_AsDouble(value);
+
+ if (x == -1.0 && PyErr_Occurred())
+ goto done;
+
+ if (type == '%') {
+ type = 'f';
+ x *= 100;
+ trailing = "%";
+ }
+
+ if (precision < 0)
+ precision = 6;
+ if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
+ type = 'g';
+
+ /* cast "type", because if we're in unicode we need to pass a
+ 8-bit char. this is safe, because we've restricted what "type"
+ can be */
+ PyOS_snprintf(fmt, sizeof(fmt), "%%.%zd%c", precision, (char)type);
+
+ /* call the passed in function to do the actual formatting */
+ snprintf(charbuf, sizeof(charbuf), fmt, x);
+
+ /* adding trailing to fmt with PyOS_snprintf doesn't work, not
+ sure why. we'll just concatentate it here, no harm done. we
+ know we can't have a buffer overflow from the fmt size
+ analysis */
+ strcat(charbuf, trailing);
+
+ /* rather than duplicate the code for snprintf for both unicode
+ and 8 bit strings, we just use the 8 bit version and then
+ convert to unicode in a separate code path. that's probably
+ the lesser of 2 evils. */
+#if STRINGLIB_IS_UNICODE
+ n_digits = strtounicode(unicodebuf, charbuf);
+ p = unicodebuf;
+#else
+ /* compute the length. I believe this is done because the return
+ value from snprintf above is unreliable */
+ n_digits = strlen(charbuf);
+ p = charbuf;
+#endif
+
+ /* is a sign character present in the output? if so, remember it
+ and skip it */
+ sign = p[0];
+ if (sign == '-') {
+ p++;
+ n_digits--;
+ }
+
+ calc_number_widths(&spec, sign, n_digits, format);
+
+ /* allocate a string with enough space */
+ result = STRINGLIB_NEW(NULL, spec.n_total);
+ if (result == NULL)
+ goto done;
+
+ /* fill in the non-digit parts */
+ fill_number(STRINGLIB_STR(result), &spec, n_digits,
+ format->fill_char == '\0' ? ' ' : format->fill_char);
+
+ /* fill in the digit parts */
+ memmove(STRINGLIB_STR(result) + (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
+ p,
+ n_digits * sizeof(STRINGLIB_CHAR));
+
+done:
+ return result;
+}
+
+static PyObject *
+format_float_internal(PyObject *value, const InternalFormatSpec *format)
+{
+ if (format->type == 'n')
+ return _format_float('f', value, format, snprintf_double);
+ else
+ return _format_float(format->type, value, format, PyOS_ascii_formatd);
+}
+
+/************************************************************************/
+/*********** built in formatters ****************************************/
+/************************************************************************/
+
+PyObject *
+FORMAT_STRING(PyObject* value, PyObject* args)
+{
+ PyObject *format_spec;
+ PyObject *tmp = NULL;
+ PyObject *result = NULL;
+ InternalFormatSpec format;
+
+ if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
+ goto done;
+ if (!STRINGLIB_CHECK(format_spec)) {
+ PyErr_SetString(PyExc_TypeError, STRINGLIB_TYPE_NAME " object required");
+ goto done;
+ }
+
+ /* check for the special case of zero length format spec, make
+ it equivalent to str(value) */
+ if (STRINGLIB_LEN(format_spec) == 0) {
+ result = STRINGLIB_TOSTR(value);
+ goto done;
+ }
+
+ /* parse the format_spec */
+ if (!parse_internal_render_format_spec(format_spec, &format, 's'))
+ goto done;
+
+ /* type conversion? */
+ switch (format.type) {
+ case 's':
+ /* no type conversion needed, already a string. do the formatting */
+ result = format_string_internal(value, &format);
+ break;
+#if 0
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'o':
+ case 'x':
+ case 'X':
+ /* convert to integer */
+ /* XXX: make a stringlib function to do this when backporting,
+ since FromUnicode differs from FromString */
+ tmp = PyLong_FromUnicode(STRINGLIB_STR(value), STRINGLIB_LEN(value), 0);
+ if (tmp == NULL)
+ goto done;
+ result = format_long_internal(tmp, &format);
+ break;
+
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ case 'n':
+ case '%':
+ /* convert to float */
+ tmp = PyFloat_FromString(value);
+ if (tmp == NULL)
+ goto done;
+ result = format_float_internal(tmp, &format);
+ break;
+#endif
+ default:
+ /* unknown */
+ PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
+ format.type);
+ goto done;
+ }
+
+done:
+ Py_XDECREF(tmp);
+ return result;
+}
+
+PyObject *
+FORMAT_LONG(PyObject* value, PyObject* args)
+{
+ PyObject *format_spec;
+ PyObject *result = NULL;
+ PyObject *tmp = NULL;
+ InternalFormatSpec format;
+
+ if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
+ goto done;
+ if (!STRINGLIB_CHECK(format_spec)) {
+ PyErr_SetString(PyExc_TypeError, STRINGLIB_TYPE_NAME " object required");
+ goto done;
+ }
+
+ /* check for the special case of zero length format spec, make
+ it equivalent to str(value) */
+ if (STRINGLIB_LEN(format_spec) == 0) {
+ result = STRINGLIB_TOSTR(value);
+ goto done;
+ }
+
+ /* parse the format_spec */
+ if (!parse_internal_render_format_spec(format_spec, &format, 'd'))
+ goto done;
+
+ /* type conversion? */
+ switch (format.type) {
+#if 0
+ case 's':
+ /* convert to string/unicode */
+ tmp = STRINGLIB_TOSTR(value);
+ if (tmp == NULL)
+ goto done;
+ result = format_string_internal(tmp, &format);
+ break;
+#endif
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'o':
+ case 'x':
+ case 'X':
+ /* no type conversion needed, already an int. do the formatting */
+ result = format_long_internal(value, &format);
+ break;
+
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ case 'n':
+ case '%':
+ /* convert to float */
+ tmp = PyNumber_Float(value);
+ if (tmp == NULL)
+ goto done;
+ result = format_float_internal(value, &format);
+ break;
+
+ default:
+ /* unknown */
+ PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
+ format.type);
+ goto done;
+ }
+
+done:
+ Py_XDECREF(tmp);
+ return result;
+}
+
+PyObject *
+FORMAT_FLOAT(PyObject *value, PyObject *args)
+{
+ PyObject *format_spec;
+ PyObject *result = NULL;
+ PyObject *tmp = NULL;
+ InternalFormatSpec format;
+
+ if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
+ goto done;
+ if (!STRINGLIB_CHECK(format_spec)) {
+ PyErr_SetString(PyExc_TypeError, STRINGLIB_TYPE_NAME " object required");
+ goto done;
+ }
+
+ /* check for the special case of zero length format spec, make
+ it equivalent to str(value) */
+ if (STRINGLIB_LEN(format_spec) == 0) {
+ result = STRINGLIB_TOSTR(value);
+ goto done;
+ }
+
+ /* parse the format_spec */
+ if (!parse_internal_render_format_spec(format_spec, &format, 'g'))
+ goto done;
+
+ /* type conversion? */
+ switch (format.type) {
+#if 0
+ case 's':
+ /* convert to string/unicode */
+ tmp = STRINGLIB_TOSTR(value);
+ if (tmp == NULL)
+ goto done;
+ result = format_string_internal(tmp, &format);
+ break;
+#endif
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'o':
+ case 'x':
+ case 'X':
+ /* convert to integer */
+ tmp = PyNumber_Long(value);
+ if (tmp == NULL)
+ goto done;
+ result = format_long_internal(tmp, &format);
+ break;
+
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ case 'n':
+ case '%':
+ /* no conversion, already a float. do the formatting */
+ result = format_float_internal(value, &format);
+ break;
+
+ default:
+ /* unknown */
+ PyErr_Format(PyExc_ValueError, "Unknown conversion type %c",
+ format.type);
+ goto done;
+ }
+
+done:
+ Py_XDECREF(tmp);
+ return result;
+}
diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h
new file mode 100644
index 0000000..5803216
--- /dev/null
+++ b/Objects/stringlib/string_format.h
@@ -0,0 +1,831 @@
+/*
+ string_format.h -- implementation of string.format().
+
+ It uses the Objects/stringlib conventions, so that it can be
+ compiled for both unicode and string objects.
+*/
+
+
+/* Defines for more efficiently reallocating the string buffer */
+#define INITIAL_SIZE_INCREMENT 100
+#define SIZE_MULTIPLIER 2
+#define MAX_SIZE_INCREMENT 3200
+
+
+/************************************************************************/
+/*********** Global data structures and forward declarations *********/
+/************************************************************************/
+
+/*
+ A SubString consists of the characters between two string or
+ unicode pointers.
+*/
+typedef struct {
+ STRINGLIB_CHAR *ptr;
+ STRINGLIB_CHAR *end;
+} SubString;
+
+
+/* forward declaration for recursion */
+static PyObject *
+build_string(SubString *input, PyObject *args, PyObject *kwargs,
+ int *recursion_level);
+
+
+
+/************************************************************************/
+/************************** Utility functions ************************/
+/************************************************************************/
+
+/* fill in a SubString from a pointer and length */
+Py_LOCAL_INLINE(void)
+SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
+{
+ str->ptr = p;
+ if (p == NULL)
+ str->end = NULL;
+ else
+ str->end = str->ptr + len;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+SubString_new_object(SubString *str)
+{
+ return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
+}
+
+/************************************************************************/
+/*********** Error handling and exception generation **************/
+/************************************************************************/
+
+/*
+ Most of our errors are value errors, because to Python, the
+ format string is a "value". Also, it's convenient to return
+ a NULL when we are erroring out.
+
+ XXX: need better error handling, per PEP 3101.
+*/
+static void *
+SetError(const char *s)
+{
+ /* PyErr_Format always returns NULL */
+ return PyErr_Format(PyExc_ValueError, "%s in format string", s);
+}
+
+/*
+ check_input returns True if we still have characters
+ left in the input string.
+
+ XXX: make this function go away when better error handling is
+ implemented.
+*/
+Py_LOCAL_INLINE(int)
+check_input(SubString *input)
+{
+ if (input->ptr < input->end)
+ return 1;
+ PyErr_SetString(PyExc_ValueError,
+ "unterminated replacement field");
+ return 0;
+}
+
+/************************************************************************/
+/*********** Output string management functions ****************/
+/************************************************************************/
+
+typedef struct {
+ STRINGLIB_CHAR *ptr;
+ STRINGLIB_CHAR *end;
+ PyObject *obj;
+ Py_ssize_t size_increment;
+} OutputString;
+
+/* initialize an OutputString object, reserving size characters */
+static int
+output_initialize(OutputString *output, Py_ssize_t size)
+{
+ output->obj = STRINGLIB_NEW(NULL, size);
+ if (output->obj == NULL)
+ return 0;
+
+ output->ptr = STRINGLIB_STR(output->obj);
+ output->end = STRINGLIB_LEN(output->obj) + output->ptr;
+ output->size_increment = INITIAL_SIZE_INCREMENT;
+
+ return 1;
+}
+
+/*
+ output_extend reallocates the output string buffer.
+ It returns a status: 0 for a failed reallocation,
+ 1 for success.
+*/
+
+static int
+output_extend(OutputString *output, Py_ssize_t count)
+{
+ STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
+ Py_ssize_t curlen = output->ptr - startptr;
+ Py_ssize_t maxlen = curlen + count + output->size_increment;
+
+ if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
+ return 0;
+ startptr = STRINGLIB_STR(output->obj);
+ output->ptr = startptr + curlen;
+ output->end = startptr + maxlen;
+ if (output->size_increment < MAX_SIZE_INCREMENT)
+ output->size_increment *= SIZE_MULTIPLIER;
+ return 1;
+}
+
+/*
+ output_data dumps characters into our output string
+ buffer.
+
+ In some cases, it has to reallocate the string.
+
+ It returns a status: 0 for a failed reallocation,
+ 1 for success.
+*/
+static int
+output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
+{
+ if ((count > output->end - output->ptr) && !output_extend(output, count))
+ return 0;
+ memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
+ output->ptr += count;
+ return 1;
+}
+
+/************************************************************************/
+/*********** Format string parsing -- integers and identifiers *********/
+/************************************************************************/
+
+/*
+ end_identifier returns true if a character marks
+ the end of an identifier string.
+
+ Although the PEP specifies that identifiers are
+ numbers or valid Python identifiers, we just let
+ getattr/getitem handle that, so the implementation
+ is more flexible than the PEP would indicate.
+*/
+Py_LOCAL_INLINE(int)
+end_identifier(STRINGLIB_CHAR c)
+{
+ switch (c) {
+ case '.': case '[': case ']':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/*
+ get_integer consumes 0 or more decimal digit characters from an
+ input string, updates *result with the corresponding positive
+ integer, and returns the number of digits consumed.
+
+ returns -1 on error.
+*/
+static int
+get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
+ Py_ssize_t *result)
+{
+ Py_ssize_t accumulator, digitval, oldaccumulator;
+ int numdigits;
+ accumulator = numdigits = 0;
+ for (;;(*ptr)++, numdigits++) {
+ if (*ptr >= end)
+ break;
+ digitval = STRINGLIB_TODECIMAL(**ptr);
+ if (digitval < 0)
+ break;
+ /*
+ This trick was copied from old Unicode format code. It's cute,
+ but would really suck on an old machine with a slow divide
+ implementation. Fortunately, in the normal case we do not
+ expect too many digits.
+ */
+ oldaccumulator = accumulator;
+ accumulator *= 10;
+ if ((accumulator+10)/10 != oldaccumulator+1) {
+ PyErr_Format(PyExc_ValueError,
+ "Too many decimal digits in format string");
+ return -1;
+ }
+ accumulator += digitval;
+ }
+ *result = accumulator;
+ return numdigits;
+}
+
+/*
+ get_identifier is a bit of a misnomer. It returns a value for use
+ with getattr or getindex. This value will a string/unicode
+ object. The input cannot be zero length. Continues until end of
+ input, or end_identifier() returns true.
+*/
+static PyObject *
+get_identifier(SubString *input)
+{
+ STRINGLIB_CHAR *start;
+
+ for (start = input->ptr;
+ input->ptr < input->end && !end_identifier(*input->ptr);
+ input->ptr++)
+ ;
+
+ return STRINGLIB_NEW(start, input->ptr - start);
+
+ /*
+ We might want to add code here to check for invalid Python
+ identifiers. All identifiers are eventually passed to getattr
+ or getitem, so there is a check when used. However, we might
+ want to remove (or not) the ability to have strings like
+ "a/b" or " ab" or "-1" (which is not parsed as a number).
+ For now, this is left as an exercise for the first disgruntled
+ user...
+
+ if (XXX -- need check function) {
+ Py_DECREF(result);
+ PyErr_SetString(PyExc_ValueError,
+ "Invalid embedded Python identifier");
+ return NULL;
+ }
+ */
+}
+
+/************************************************************************/
+/******** Functions to get field objects and specification strings ******/
+/************************************************************************/
+
+/* get_field_and_spec is the main function in this section. It parses
+ the format string well enough to return a field object to render along
+ with a field specification string.
+*/
+
+/*
+ look up key in our keyword arguments
+*/
+static PyObject *
+key_lookup(PyObject *kwargs, PyObject *key)
+{
+ PyObject *result;
+
+ if (kwargs && (result = PyDict_GetItem(kwargs, key)) != NULL) {
+ Py_INCREF(result);
+ return result;
+ }
+ return NULL;
+}
+
+/*
+ get_field_object returns the object inside {}, before the
+ format_spec. It handles getindex and getattr lookups and consumes
+ the entire input string.
+*/
+static PyObject *
+get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
+{
+ PyObject *myobj, *subobj, *newobj;
+ STRINGLIB_CHAR c;
+ Py_ssize_t index;
+ int isindex, isnumeric, isargument;
+
+ index = isnumeric = 0; /* Just to shut up the compiler warnings */
+
+ myobj = args;
+ Py_INCREF(myobj);
+
+ for (isindex=1, isargument=1;;) {
+ if (!check_input(input))
+ break;
+ if (!isindex) {
+ if ((subobj = get_identifier(input)) == NULL)
+ break;
+ newobj = PyObject_GetAttr(myobj, subobj);
+ Py_DECREF(subobj);
+ } else {
+ isnumeric = (STRINGLIB_ISDECIMAL(*input->ptr));
+ if (isnumeric)
+ /* XXX: add error checking */
+ get_integer(&input->ptr, input->end, &index);
+
+ if (isnumeric && PySequence_Check(myobj))
+ newobj = PySequence_GetItem(myobj, index);
+ else {
+ /* XXX -- do we need PyLong_FromLongLong?
+ Using ssizet, not int... */
+ subobj = isnumeric ?
+ PyInt_FromLong(index) :
+ get_identifier(input);
+ if (subobj == NULL)
+ break;
+ if (isargument) {
+ newobj = key_lookup(kwargs, subobj);
+ } else {
+ newobj = PyObject_GetItem(myobj, subobj);
+ }
+ Py_DECREF(subobj);
+ }
+ }
+ Py_DECREF(myobj);
+ myobj = newobj;
+ if (myobj == NULL)
+ break;
+ if (!isargument && isindex)
+ if ((!check_input(input)) || (*(input->ptr++) != ']')) {
+ SetError("Expected ]");
+ break;
+ }
+
+ /* if at the end of input, return with myobj */
+ if (input->ptr >= input->end)
+ return myobj;
+
+ c = *input->ptr;
+ input->ptr++;
+ isargument = 0;
+ isindex = (c == '[');
+ if (!isindex && (c != '.')) {
+ SetError("Expected ., [, :, !, or }");
+ break;
+ }
+ }
+ if ((myobj == NULL) && isargument) {
+ /* XXX: include more useful error information, like which
+ * keyword not found or which index missing */
+ PyErr_Clear();
+ return SetError(isnumeric
+ ? "Not enough positional arguments"
+ : "Keyword argument not found");
+ }
+ Py_XDECREF(myobj);
+ return NULL;
+}
+
+/************************************************************************/
+/***************** Field rendering functions **************************/
+/************************************************************************/
+
+/*
+ render_field() is the main function in this section. It takes the
+ field object and field specification string generated by
+ get_field_and_spec, and renders the field into the output string.
+
+ format() does the actual calling of the objects __format__ method.
+*/
+
+
+/* returns fieldobj.__format__(format_spec) */
+static PyObject *
+format(PyObject *fieldobj, SubString *format_spec)
+{
+ static PyObject *format_str = NULL;
+ PyObject *meth;
+ PyObject *spec = NULL;
+ PyObject *result = NULL;
+
+ /* Initialize cached value */
+ if (format_str == NULL) {
+ /* Initialize static variable needed by _PyType_Lookup */
+ format_str = PyUnicode_FromString("__format__");
+ if (format_str == NULL)
+ return NULL;
+ }
+
+ /* Make sure the type is initialized. float gets initialized late */
+ if (Py_Type(fieldobj)->tp_dict == NULL)
+ if (PyType_Ready(Py_Type(fieldobj)) < 0)
+ return NULL;
+
+ /* we need to create an object out of the pointers we have */
+ spec = SubString_new_object(format_spec);
+ if (spec == NULL)
+ goto done;
+
+ /* Find the (unbound!) __format__ method (a borrowed reference) */
+ meth = _PyType_Lookup(Py_Type(fieldobj), format_str);
+ if (meth == NULL) {
+ PyErr_Format(PyExc_TypeError,
+ "Type %.100s doesn't define __format__",
+ Py_Type(fieldobj)->tp_name);
+ goto done;
+ }
+
+ /* And call it, binding it to the value */
+ result = PyObject_CallFunctionObjArgs(meth, fieldobj, spec, NULL);
+ if (result == NULL)
+ goto done;
+
+ if (!STRINGLIB_CHECK(result)) {
+ PyErr_SetString(PyExc_TypeError,
+ "__format__ method did not return "
+ STRINGLIB_TYPE_NAME);
+ Py_DECREF(result);
+ result = NULL;
+ goto done;
+ }
+
+done:
+ Py_XDECREF(spec);
+ return result;
+}
+
+/*
+ render_field calls fieldobj.__format__(format_spec) method, and
+ appends to the output.
+*/
+static int
+render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
+{
+ int ok = 0;
+ PyObject *result = format(fieldobj, format_spec);
+
+ if (result == NULL)
+ goto done;
+
+ ok = output_data(output,
+ STRINGLIB_STR(result), STRINGLIB_LEN(result));
+done:
+ Py_XDECREF(result);
+ return ok;
+}
+
+static int
+parse_field(SubString *str, SubString *field_name, SubString *format_spec,
+ STRINGLIB_CHAR *conversion)
+{
+ STRINGLIB_CHAR c = 0;
+
+ /* initialize these, as they may be empty */
+ *conversion = '\0';
+ SubString_init(format_spec, NULL, 0);
+
+ /* search for the field name. it's terminated by the end of the
+ string, or a ':' or '!' */
+ field_name->ptr = str->ptr;
+ while (str->ptr < str->end) {
+ switch (c = *(str->ptr++)) {
+ case ':':
+ case '!':
+ break;
+ default:
+ continue;
+ }
+ break;
+ }
+
+ if (c == '!' || c == ':') {
+ /* we have a format specifier and/or a conversion */
+ /* don't include the last character */
+ field_name->end = str->ptr-1;
+
+ /* the format specifier is the rest of the string */
+ format_spec->ptr = str->ptr;
+ format_spec->end = str->end;
+
+ /* see if there's a conversion specifier */
+ if (c == '!') {
+ /* there must be another character present */
+ if (format_spec->ptr >= format_spec->end) {
+ PyErr_SetString(PyExc_ValueError,
+ "end of format while looking for conversion "
+ "specifier");
+ return 0;
+ }
+ *conversion = *(format_spec->ptr++);
+
+ /* if there is another character, it must be a colon */
+ if (format_spec->ptr < format_spec->end) {
+ c = *(format_spec->ptr++);
+ if (c != ':') {
+ PyErr_SetString(PyExc_ValueError,
+ "expected ':' after format specifier");
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+
+ } else {
+ /* end of string, there's no format_spec or conversion */
+ field_name->end = str->ptr;
+ return 1;
+ }
+}
+
+/************************************************************************/
+/******* Output string allocation and escape-to-markup processing ******/
+/************************************************************************/
+
+/* MarkupIterator breaks the string into pieces of either literal
+ text, or things inside {} that need to be marked up. it is
+ designed to make it easy to wrap a Python iterator around it, for
+ use with the Formatter class */
+
+typedef struct {
+ SubString str;
+ int in_markup;
+} MarkupIterator;
+
+static int
+MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
+{
+ SubString_init(&self->str, ptr, len);
+ self->in_markup = 0;
+ return 1;
+}
+
+/* returns 0 on error, 1 on non-error termination, and 2 if it got a
+ string (or something to be expanded) */
+static int
+MarkupIterator_next(MarkupIterator *self, int *is_markup, SubString *literal,
+ SubString *field_name, SubString *format_spec,
+ STRINGLIB_CHAR *conversion,
+ int *format_spec_needs_expanding)
+{
+ int at_end;
+ STRINGLIB_CHAR c = 0;
+ STRINGLIB_CHAR *start;
+ int count;
+ Py_ssize_t len;
+
+ *format_spec_needs_expanding = 0;
+
+ /* no more input, end of iterator */
+ if (self->str.ptr >= self->str.end)
+ return 1;
+
+ *is_markup = self->in_markup;
+ start = self->str.ptr;
+
+ if (self->in_markup) {
+
+ /* prepare for next iteration */
+ self->in_markup = 0;
+
+ /* this is markup, find the end of the string by counting nested
+ braces. note that this prohibits escaped braces, so that
+ format_specs cannot have braces in them. */
+ count = 1;
+
+ /* we know we can't have a zero length string, so don't worry
+ about that case */
+ while (self->str.ptr < self->str.end) {
+ switch (c = *(self->str.ptr++)) {
+ case '{':
+ /* the format spec needs to be recursively expanded.
+ this is an optimization, and not strictly needed */
+ *format_spec_needs_expanding = 1;
+ count++;
+ break;
+ case '}':
+ count--;
+ if (count <= 0) {
+ /* we're done. parse and get out */
+ literal->ptr = start;
+ literal->end = self->str.ptr-1;
+
+ if (parse_field(literal, field_name, format_spec,
+ conversion) == 0)
+ return 0;
+
+ /* success */
+ return 2;
+ }
+ break;
+ }
+ }
+ /* end of string while searching for matching '}' */
+ PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
+ return 0;
+
+ } else {
+ /* literal text, read until the end of string, an escaped { or },
+ or an unescaped { */
+ while (self->str.ptr < self->str.end) {
+ switch (c = *(self->str.ptr++)) {
+ case '{':
+ case '}':
+ self->in_markup = 1;
+ break;
+ default:
+ continue;
+ }
+ break;
+ }
+
+ at_end = self->str.ptr >= self->str.end;
+ len = self->str.ptr - start;
+
+ if ((c == '}') && (at_end || (c != *self->str.ptr)))
+ return (int)SetError("Single } encountered");
+ if (at_end && c == '{')
+ return (int)SetError("Single { encountered");
+ if (!at_end) {
+ if (c == *self->str.ptr) {
+ /* escaped } or {, skip it in the input */
+ self->str.ptr++;
+ self->in_markup = 0;
+ } else
+ len--;
+ }
+
+ /* this is just plain text, return it */
+ literal->ptr = start;
+ literal->end = start + len;
+ return 2;
+ }
+}
+
+
+/* do the !r or !s conversion on obj */
+static PyObject *
+do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
+{
+ /* XXX in pre-3.0, do we need to convert this to unicode, since it
+ might have returned a string? */
+ switch (conversion) {
+ case 'r':
+ return PyObject_Repr(obj);
+ case 's':
+ return PyObject_Unicode(obj);
+ default:
+ PyErr_Format(PyExc_ValueError,
+ "Unknown converion specifier %c",
+ conversion);
+ return NULL;
+ }
+}
+
+/* given:
+
+ {field_name!conversion:format_spec}
+
+ compute the result and write it to output.
+ format_spec_needs_expanding is an optimization. if it's false,
+ just output the string directly, otherwise recursively expand the
+ format_spec string. */
+
+static int
+output_markup(SubString *field_name, SubString *format_spec,
+ int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
+ OutputString *output, PyObject *args, PyObject *kwargs,
+ int *recursion_level)
+{
+ PyObject *tmp = NULL;
+ PyObject *fieldobj = NULL;
+ SubString expanded_format_spec;
+ SubString *actual_format_spec;
+ int result = 0;
+
+ /* convert field_name to an object */
+ fieldobj = get_field_object(field_name, args, kwargs);
+ if (fieldobj == NULL)
+ goto done;
+
+ if (conversion != '\0') {
+ tmp = do_conversion(fieldobj, conversion);
+ if (tmp == NULL)
+ goto done;
+
+ /* do the assignment, transferring ownership: fieldobj = tmp */
+ Py_DECREF(fieldobj);
+ fieldobj = tmp;
+ tmp = NULL;
+ }
+
+ /* if needed, recurively compute the format_spec */
+ if (format_spec_needs_expanding) {
+ tmp = build_string(format_spec, args, kwargs, recursion_level);
+ if (tmp == NULL)
+ goto done;
+
+ /* note that in the case we're expanding the format string,
+ tmp must be kept around until after the call to
+ render_field. */
+ SubString_init(&expanded_format_spec,
+ STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
+ actual_format_spec = &expanded_format_spec;
+ } else
+ actual_format_spec = format_spec;
+
+ if (render_field(fieldobj, actual_format_spec, output) == 0)
+ goto done;
+
+ result = 1;
+
+done:
+ Py_XDECREF(fieldobj);
+ Py_XDECREF(tmp);
+
+ return result;
+}
+
+/*
+ do_markup is the top-level loop for the format() function. It
+ searches through the format string for escapes to markup codes, and
+ calls other functions to move non-markup text to the output,
+ and to perform the markup to the output.
+*/
+static int
+do_markup(SubString *input, PyObject *args, PyObject *kwargs,
+ OutputString *output, int *recursion_level)
+{
+ MarkupIterator iter;
+ int is_markup;
+ int format_spec_needs_expanding;
+ int result;
+ SubString str;
+ SubString field_name;
+ SubString format_spec;
+ STRINGLIB_CHAR conversion;
+
+ MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
+ while ((result = MarkupIterator_next(&iter, &is_markup, &str, &field_name,
+ &format_spec, &conversion,
+ &format_spec_needs_expanding)) == 2) {
+ if (is_markup) {
+ if (!output_markup(&field_name, &format_spec,
+ format_spec_needs_expanding, conversion, output,
+ args, kwargs, recursion_level))
+ return 0;
+ } else {
+ if (!output_data(output, str.ptr, str.end-str.ptr))
+ return 0;
+ }
+ }
+ return result;
+}
+
+
+/*
+ build_string allocates the output string and then
+ calls do_markup to do the heavy lifting.
+*/
+static PyObject *
+build_string(SubString *input, PyObject *args, PyObject *kwargs,
+ int *recursion_level)
+{
+ OutputString output;
+ PyObject *result = NULL;
+ Py_ssize_t count;
+
+ output.obj = NULL; /* needed so cleanup code always works */
+
+ /* check the recursion level */
+ (*recursion_level)--;
+ if (*recursion_level < 0) {
+ PyErr_SetString(PyExc_ValueError,
+ "Max string recursion exceeded");
+ goto done;
+ }
+
+ /* initial size is the length of the format string, plus the size
+ increment. seems like a reasonable default */
+ if (!output_initialize(&output,
+ input->end - input->ptr +
+ INITIAL_SIZE_INCREMENT))
+ goto done;
+
+ if (!do_markup(input, args, kwargs, &output, recursion_level)) {
+ goto done;
+ }
+
+ count = output.ptr - STRINGLIB_STR(output.obj);
+ if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
+ goto done;
+ }
+
+ /* transfer ownership to result */
+ result = output.obj;
+ output.obj = NULL;
+
+done:
+ (*recursion_level)++;
+ Py_XDECREF(output.obj);
+ return result;
+}
+
+/************************************************************************/
+/*********** main routine ***********************************************/
+/************************************************************************/
+
+/* this is the main entry point */
+static PyObject *
+do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ SubString input;
+
+ /* PEP 3101 says only 2 levels, so that
+ "{0:{1}}".format('abc', 's') # works
+ "{0:{1:{2}}}".format('abc', 's', '') # fails
+ */
+ int recursion_level = 2;
+
+ SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
+ return build_string(&input, args, kwargs, &recursion_level);
+}
diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h
new file mode 100644
index 0000000..af9bbd6
--- /dev/null
+++ b/Objects/stringlib/stringdefs.h
@@ -0,0 +1,23 @@
+#ifndef STRINGLIB_STRINGDEFS_H
+#define STRINGLIB_STRINGDEFS_H
+
+/* this is sort of a hack. there's at least one place (formatting
+ floats) where some stringlib code takes a different path if it's
+ compiled as unicode. */
+#define STRINGLIB_IS_UNICODE 0
+
+#define STRINGLIB_CHAR char
+#define STRINGLIB_TYPE_NAME "string"
+#define STRINGLIB_EMPTY string_empty
+#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))
+#define STRINGLIB_TODECIMAL(x) (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
+#define STRINGLIB_FILL memset
+#define STRINGLIB_STR PyString_AS_STRING
+#define STRINGLIB_LEN PyString_GET_SIZE
+#define STRINGLIB_NEW PyString_FromStringAndSize
+#define STRINGLIB_RESIZE _PyString_Resize
+#define STRINGLIB_CHECK PyString_Check
+#define STRINGLIB_CMP memcmp
+#define STRINGLIB_TOSTR PyObject_Str
+
+#endif /* !STRINGLIB_STRINGDEFS_H */
diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h
new file mode 100644
index 0000000..1fac2c3
--- /dev/null
+++ b/Objects/stringlib/unicodedefs.h
@@ -0,0 +1,32 @@
+#ifndef STRINGLIB_UNICODEDEFS_H
+#define STRINGLIB_UNICODEDEFS_H
+
+/* this is sort of a hack. there's at least one place (formatting
+ floats) where some stringlib code takes a different path if it's
+ compiled as unicode. */
+#define STRINGLIB_IS_UNICODE 1
+
+#define STRINGLIB_CHAR Py_UNICODE
+#define STRINGLIB_TYPE_NAME "unicode"
+#define STRINGLIB_EMPTY unicode_empty
+#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL
+#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
+#define STRINGLIB_TOUPPER Py_UNICODE_TOUPPER
+#define STRINGLIB_TOLOWER Py_UNICODE_TOLOWER
+#define STRINGLIB_FILL Py_UNICODE_FILL
+#define STRINGLIB_STR PyUnicode_AS_UNICODE
+#define STRINGLIB_LEN PyUnicode_GET_SIZE
+#define STRINGLIB_NEW PyUnicode_FromUnicode
+#define STRINGLIB_RESIZE PyUnicode_Resize
+#define STRINGLIB_CHECK PyUnicode_Check
+#define STRINGLIB_TOSTR PyObject_Unicode
+
+Py_LOCAL_INLINE(int)
+STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len)
+{
+ if (str[0] != other[0])
+ return 1;
+ return memcmp((void*) str, (void*) other, len * sizeof(Py_UNICODE));
+}
+
+#endif /* !STRINGLIB_UNICODEDEFS_H */
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 222207c..4e5e09d 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -2933,11 +2933,52 @@ object_reduce_ex(PyObject *self, PyObject *args)
return _common_reduce(self, proto);
}
+
+/*
+ from PEP 3101, this code implements:
+
+ class object:
+ def __format__(self, format_spec):
+ return format(str(self), format_spec)
+*/
+static PyObject *
+object_format(PyObject *self, PyObject *args)
+{
+ PyObject *format_spec;
+ PyObject *self_as_str = NULL;
+ PyObject *result = NULL;
+ PyObject *format_meth = NULL;
+
+ if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
+ return NULL;
+ if (!PyUnicode_Check(format_spec)) {
+ PyErr_SetString(PyExc_TypeError, "Unicode object required");
+ return NULL;
+ }
+
+ self_as_str = PyObject_Unicode(self);
+ if (self_as_str != NULL) {
+ /* find the format function */
+ format_meth = PyObject_GetAttrString(self_as_str, "__format__");
+ if (format_meth != NULL) {
+ /* and call it */
+ result = PyObject_CallFunctionObjArgs(format_meth, format_spec, NULL);
+ }
+ }
+
+ Py_XDECREF(self_as_str);
+ Py_XDECREF(format_meth);
+
+ return result;
+}
+
static PyMethodDef object_methods[] = {
{"__reduce_ex__", object_reduce_ex, METH_VARARGS,
PyDoc_STR("helper for pickle")},
{"__reduce__", object_reduce, METH_VARARGS,
PyDoc_STR("helper for pickle")},
+ {"__format__", object_format, METH_VARARGS,
+ PyDoc_STR("default object formatter")},
{0}
};
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e227fc7..3052ebd 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -45,6 +45,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#include "unicodeobject.h"
#include "ucnhash.h"
+#include "formatter_unicode.h"
+
#ifdef MS_WINDOWS
#include <windows.h>
#endif
@@ -5009,21 +5011,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
/* --- Helpers ------------------------------------------------------------ */
-#define STRINGLIB_CHAR Py_UNICODE
-
-#define STRINGLIB_LEN PyUnicode_GET_SIZE
-#define STRINGLIB_NEW PyUnicode_FromUnicode
-#define STRINGLIB_STR PyUnicode_AS_UNICODE
-
-Py_LOCAL_INLINE(int)
-STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len)
-{
- if (str[0] != other[0])
- return 1;
- return memcmp((void*) str, (void*) other, len * sizeof(Py_UNICODE));
-}
-
-#define STRINGLIB_EMPTY unicode_empty
+#include "stringlib/unicodedefs.h"
#include "stringlib/fastsearch.h"
@@ -7964,6 +7952,33 @@ unicode_endswith(PyUnicodeObject *self,
return PyBool_FromLong(result);
}
+#include "stringlib/string_format.h"
+
+PyDoc_STRVAR(format__doc__,
+"S.format(*args, **kwargs) -> unicode\n\
+\n\
+");
+
+static PyObject *
+unicode_format(PyObject *self, PyObject *args, PyObject *kwds)
+{
+ /* this calls into stringlib/string_format.h because it can be
+ included for either string or unicode. this is needed for
+ python 2.6. */
+ return do_string_format(self, args, kwds);
+}
+
+
+PyDoc_STRVAR(p_format__doc__,
+"S.__format__(format_spec) -> unicode\n\
+\n\
+");
+
+static PyObject *
+unicode__format__(PyObject *self, PyObject *args)
+{
+ return unicode_unicode__format__(self, args);
+}
static PyObject *
@@ -8019,6 +8034,8 @@ static PyMethodDef unicode_methods[] = {
{"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
{"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__},
{"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
+ {"format", (PyCFunction) unicode_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
+ {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
#if 0
{"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
#endif
@@ -9124,6 +9141,205 @@ void _Py_ReleaseInternedUnicodeStrings(void)
}
+/********************* Formatter Iterator ************************/
+
+/* this is used to implement string.Formatter.vparse(). it exists so
+ Formatter can share code with the built in unicode.format()
+ method */
+
+typedef struct {
+ PyObject_HEAD
+
+ /* we know this to be a unicode object, but since we just keep
+ it around to keep the object alive, having it as PyObject
+ is okay */
+ PyObject *str;
+
+ MarkupIterator it_markup;
+} formatteriterobject;
+
+static void
+formatteriter_dealloc(formatteriterobject *it)
+{
+ _PyObject_GC_UNTRACK(it);
+ Py_XDECREF(it->str);
+ PyObject_GC_Del(it);
+}
+
+/* returns a tuple:
+ (is_markup, literal, field_name, format_spec, conversion)
+ if is_markup == True:
+ literal is None
+ field_name is the string before the ':'
+ format_spec is the string after the ':'
+ conversion is either None, or the string after the '!'
+ if is_markup == False:
+ literal is the literal string
+ field_name is None
+ format_spec is None
+ conversion is None
+*/
+static PyObject *
+formatteriter_next(formatteriterobject *it)
+{
+ SubString literal;
+ SubString field_name;
+ SubString format_spec;
+ Py_UNICODE conversion;
+ int is_markup;
+ int format_spec_needs_expanding;
+ int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal,
+ &field_name, &format_spec, &conversion,
+ &format_spec_needs_expanding);
+
+ /* all of the SubString objects point into it->str, so no
+ memory management needs to be done on them */
+
+ if (result == 0) {
+ /* error has already been set */
+ return NULL;
+ } else if (result == 1) {
+ /* end of iterator */
+ return NULL;
+ } else {
+ PyObject *is_markup_bool = NULL;
+ PyObject *literal_str = NULL;
+ PyObject *field_name_str = NULL;
+ PyObject *format_spec_str = NULL;
+ PyObject *conversion_str = NULL;
+ PyObject *result = NULL;
+
+ assert(result == 2);
+
+ is_markup_bool = PyBool_FromLong(is_markup);
+ if (!is_markup_bool)
+ goto error;
+
+ if (is_markup) {
+ /* field_name, format_spec, and conversion are
+ returned */
+ literal_str = Py_None;
+ Py_INCREF(literal_str);
+
+ field_name_str = SubString_new_object(&field_name);
+ if (field_name_str == NULL)
+ goto error;
+
+ format_spec_str = SubString_new_object(&format_spec);
+ if (format_spec_str == NULL)
+ goto error;
+
+ /* if the conversion is not specified, return
+ a None, otherwise create a one length
+ string with the conversion characater */
+ if (conversion == '\0') {
+ conversion_str = Py_None;
+ Py_INCREF(conversion_str);
+ } else
+ conversion_str = PyUnicode_FromUnicode(&conversion,
+ 1);
+ if (conversion_str == NULL)
+ goto error;
+ } else {
+ /* only literal is returned */
+ literal_str = SubString_new_object(&literal);
+ if (literal_str == NULL)
+ goto error;
+
+ field_name_str = Py_None;
+ format_spec_str = Py_None;
+ conversion_str = Py_None;
+
+ Py_INCREF(field_name_str);
+ Py_INCREF(format_spec_str);
+ Py_INCREF(conversion_str);
+ }
+ /* return a tuple of values */
+ result = PyTuple_Pack(5, is_markup_bool, literal_str,
+ field_name_str, format_spec_str,
+ conversion_str);
+ if (result == NULL)
+ goto error;
+
+ return result;
+ error:
+ Py_XDECREF(is_markup_bool);
+ Py_XDECREF(literal_str);
+ Py_XDECREF(field_name_str);
+ Py_XDECREF(format_spec_str);
+ Py_XDECREF(conversion_str);
+ Py_XDECREF(result);
+ return NULL;
+ }
+}
+
+static PyMethodDef formatteriter_methods[] = {
+ {NULL, NULL} /* sentinel */
+};
+
+PyTypeObject PyFormatterIter_Type = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ "formatteriterator", /* tp_name */
+ sizeof(formatteriterobject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)formatteriter_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ 0, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)formatteriter_next, /* tp_iternext */
+ formatteriter_methods, /* tp_methods */
+ 0,
+};
+
+PyObject *
+_unicodeformatter_iterator(PyObject *str)
+{
+ formatteriterobject *it;
+
+ it = PyObject_GC_New(formatteriterobject, &PyFormatterIter_Type);
+ if (it == NULL)
+ return NULL;
+
+ /* take ownership, give the object to the iterator */
+ Py_INCREF(str);
+ it->str = str;
+
+ /* initialize the contained MarkupIterator */
+ MarkupIterator_init(&it->it_markup,
+ PyUnicode_AS_UNICODE(str),
+ PyUnicode_GET_SIZE(str));
+
+ _PyObject_GC_TRACK(it);
+ return (PyObject *)it;
+}
+
+PyObject *
+_unicodeformatter_lookup(PyObject *field_name, PyObject *args,
+ PyObject *kwargs)
+{
+ return NULL;
+}
+
+
/********************* Unicode Iterator **************************/
typedef struct {