From dc13b79a384691ed966f760a58d73a1b835e7d6b Mon Sep 17 00:00:00 2001 From: Eric Smith Date: Fri, 30 May 2008 18:10:04 +0000 Subject: Refactor and clean up str.format() code (and helpers) in advance of optimizations. --- Include/bytesobject.h | 6 +++ Include/floatobject.h | 6 +++ Include/formatter_string.h | 12 ----- Include/formatter_unicode.h | 12 ----- Include/intobject.h | 6 +++ Include/longobject.h | 6 +++ Include/unicodeobject.h | 6 +++ Objects/bytesobject.c | 32 ++++++++++-- Objects/floatobject.c | 29 ++++------- Objects/intobject.c | 27 ++++------ Objects/longobject.c | 27 ++++------ Objects/stringlib/formatter.h | 116 +++++++++++++++++------------------------- Objects/unicodeobject.c | 31 ++++++++++- Python/formatter_string.c | 11 ++-- Python/formatter_unicode.c | 6 +-- 15 files changed, 176 insertions(+), 157 deletions(-) delete mode 100644 Include/formatter_string.h delete mode 100644 Include/formatter_unicode.h diff --git a/Include/bytesobject.h b/Include/bytesobject.h index 941577a..cb31da8 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -188,6 +188,12 @@ PyAPI_FUNC(int) _PyBytes_InsertThousandsGrouping(char *buffer, Py_ssize_t *count, int append_zero_char); +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(PyObject *) _PyBytes_FormatAdvanced(PyObject *obj, + char *format_spec, + Py_ssize_t format_spec_len); + #ifdef __cplusplus } #endif diff --git a/Include/floatobject.h b/Include/floatobject.h index 84398c2..04978be 100644 --- a/Include/floatobject.h +++ b/Include/floatobject.h @@ -115,6 +115,12 @@ PyAPI_FUNC(double) _PyFloat_Unpack8(const unsigned char *p, int le); /* free list api */ PyAPI_FUNC(void) PyFloat_CompactFreeList(size_t *, size_t *, size_t *); +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj, + char *format_spec, + Py_ssize_t format_spec_len); + #ifdef __cplusplus } #endif diff --git a/Include/formatter_string.h b/Include/formatter_string.h deleted file mode 100644 index 14c4811..0000000 --- a/Include/formatter_string.h +++ /dev/null @@ -1,12 +0,0 @@ -PyObject * -string__format__(PyObject *self, PyObject *args); - -PyObject * -string_long__format__(PyObject *self, PyObject *args); - -PyObject * -string_int__format__(PyObject *self, PyObject *args); - -PyObject * -string_float__format__(PyObject *self, PyObject *args); - diff --git a/Include/formatter_unicode.h b/Include/formatter_unicode.h deleted file mode 100644 index 51406ab..0000000 --- a/Include/formatter_unicode.h +++ /dev/null @@ -1,12 +0,0 @@ -PyObject * -unicode__format__(PyObject *self, PyObject *args); - -PyObject * -unicode_long__format__(PyObject *self, PyObject *args); - -PyObject * -unicode_int__format__(PyObject *self, PyObject *args); - -PyObject * -unicode_float__format__(PyObject *self, PyObject *args); - diff --git a/Include/intobject.h b/Include/intobject.h index 66a62d9..329ff45 100644 --- a/Include/intobject.h +++ b/Include/intobject.h @@ -68,6 +68,12 @@ PyAPI_FUNC(void) PyInt_CompactFreeList(size_t *, size_t *, size_t *); a leading "0" */ PyAPI_FUNC(PyObject*) _PyInt_Format(PyIntObject* v, int base, int newstyle); +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(PyObject *) _PyInt_FormatAdvanced(PyObject *obj, + char *format_spec, + Py_ssize_t format_spec_len); + #ifdef __cplusplus } #endif diff --git a/Include/longobject.h b/Include/longobject.h index 8a9e676..fa511a7 100644 --- a/Include/longobject.h +++ b/Include/longobject.h @@ -119,6 +119,12 @@ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v, a leading "0", instead of the prefix "0o" */ PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base, int addL, int newstyle); +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj, + char *format_spec, + Py_ssize_t format_spec_len); + #ifdef __cplusplus } #endif diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index d7ed0fa..057f770 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -553,6 +553,12 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromObject( PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list); PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...); +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, + Py_UNICODE *format_spec, + Py_ssize_t format_spec_len); + /* --- wchar_t support for platforms which support it --------------------- */ #ifdef HAVE_WCHAR_H diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 0f4d4c3..79c1e4f 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3,9 +3,6 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" - -#include "formatter_string.h" - #include #ifdef COUNT_ALLOCS @@ -3939,6 +3936,35 @@ PyDoc_STRVAR(format__doc__, \n\ "); +static PyObject * +string__format__(PyObject* self, PyObject* args) +{ + PyObject *format_spec; + PyObject *result = NULL; + PyObject *tmp = NULL; + + /* If 2.x, convert format_spec to the same type as value */ + /* This is to allow things like u''.format('') */ + if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) + goto done; + if (!(PyBytes_Check(format_spec) || PyUnicode_Check(format_spec))) { + PyErr_Format(PyExc_TypeError, "__format__ arg must be str " + "or unicode, not %s", Py_TYPE(format_spec)->tp_name); + goto done; + } + tmp = PyObject_Str(format_spec); + if (tmp == NULL) + goto done; + format_spec = tmp; + + result = _PyBytes_FormatAdvanced(self, + PyBytes_AS_STRING(format_spec), + PyBytes_GET_SIZE(format_spec)); +done: + Py_XDECREF(tmp); + return result; +} + PyDoc_STRVAR(p_format__doc__, "S.__format__(format_spec) -> unicode\n\ \n\ diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 3b49341..baf55aa 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -14,9 +14,6 @@ #include #endif -#include "formatter_string.h" - - #ifdef _OSF_SOURCE /* OSF1 5.1 doesn't make this available with XOPEN_SOURCE_EXTENDED defined */ extern int finite(double); @@ -1398,26 +1395,22 @@ float__format__(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) return NULL; if (PyBytes_Check(format_spec)) - return string_float__format__(self, args); + return _PyFloat_FormatAdvanced(self, + PyBytes_AS_STRING(format_spec), + PyBytes_GET_SIZE(format_spec)); if (PyUnicode_Check(format_spec)) { /* Convert format_spec to a str */ - PyObject *result = NULL; - PyObject *newargs = NULL; - PyObject *string_format_spec = NULL; + PyObject *result; + PyObject *str_spec = PyObject_Str(format_spec); - string_format_spec = PyObject_Str(format_spec); - if (string_format_spec == NULL) - goto done; - - newargs = Py_BuildValue("(O)", string_format_spec); - if (newargs == NULL) - goto done; + if (str_spec == NULL) + return NULL; - result = string_float__format__(self, newargs); + result = _PyFloat_FormatAdvanced(self, + PyBytes_AS_STRING(str_spec), + PyBytes_GET_SIZE(str_spec)); - done: - Py_XDECREF(string_format_spec); - Py_XDECREF(newargs); + Py_DECREF(str_spec); return result; } PyErr_SetString(PyExc_TypeError, "__format__ requires str or unicode"); diff --git a/Objects/intobject.c b/Objects/intobject.c index 2af9451..3b68640 100644 --- a/Objects/intobject.c +++ b/Objects/intobject.c @@ -3,7 +3,6 @@ #include "Python.h" #include -#include "formatter_string.h" static PyObject *int_int(PyIntObject *v); @@ -1117,26 +1116,22 @@ int__format__(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) return NULL; if (PyBytes_Check(format_spec)) - return string_int__format__(self, args); + return _PyInt_FormatAdvanced(self, + PyBytes_AS_STRING(format_spec), + PyBytes_GET_SIZE(format_spec)); if (PyUnicode_Check(format_spec)) { /* Convert format_spec to a str */ - PyObject *result = NULL; - PyObject *newargs = NULL; - PyObject *string_format_spec = NULL; + PyObject *result; + PyObject *str_spec = PyObject_Str(format_spec); - string_format_spec = PyObject_Str(format_spec); - if (string_format_spec == NULL) - goto done; - - newargs = Py_BuildValue("(O)", string_format_spec); - if (newargs == NULL) - goto done; + if (str_spec == NULL) + return NULL; - result = string_int__format__(self, newargs); + result = _PyInt_FormatAdvanced(self, + PyBytes_AS_STRING(str_spec), + PyBytes_GET_SIZE(str_spec)); - done: - Py_XDECREF(string_format_spec); - Py_XDECREF(newargs); + Py_DECREF(str_spec); return result; } PyErr_SetString(PyExc_TypeError, "__format__ requires str or unicode"); diff --git a/Objects/longobject.c b/Objects/longobject.c index 5922aa5..82a57ec 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -6,7 +6,6 @@ #include "Python.h" #include "longintrepr.h" -#include "formatter_string.h" #include @@ -3415,26 +3414,22 @@ long__format__(PyObject *self, PyObject *args) if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) return NULL; if (PyBytes_Check(format_spec)) - return string_long__format__(self, args); + return _PyLong_FormatAdvanced(self, + PyBytes_AS_STRING(format_spec), + PyBytes_GET_SIZE(format_spec)); if (PyUnicode_Check(format_spec)) { /* Convert format_spec to a str */ - PyObject *result = NULL; - PyObject *newargs = NULL; - PyObject *string_format_spec = NULL; + PyObject *result; + PyObject *str_spec = PyObject_Str(format_spec); - string_format_spec = PyObject_Str(format_spec); - if (string_format_spec == NULL) - goto done; - - newargs = Py_BuildValue("(O)", string_format_spec); - if (newargs == NULL) - goto done; + if (str_spec == NULL) + return NULL; - result = string_long__format__(self, newargs); + result = _PyLong_FormatAdvanced(self, + PyBytes_AS_STRING(str_spec), + PyBytes_GET_SIZE(str_spec)); - done: - Py_XDECREF(string_format_spec); - Py_XDECREF(newargs); + Py_DECREF(str_spec); return result; } PyErr_SetString(PyExc_TypeError, "__format__ requires str or unicode"); diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h index 05ccfea..2bb2ed2 100644 --- a/Objects/stringlib/formatter.h +++ b/Objects/stringlib/formatter.h @@ -102,12 +102,13 @@ typedef struct { if failure, sets the exception */ static int -parse_internal_render_format_spec(PyObject *format_spec, +parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len, InternalFormatSpec *format, char default_type) { - STRINGLIB_CHAR *ptr = STRINGLIB_STR(format_spec); - STRINGLIB_CHAR *end = ptr + STRINGLIB_LEN(format_spec); + STRINGLIB_CHAR *ptr = format_spec; + STRINGLIB_CHAR *end = format_spec + format_spec_len; /* end-ptr is used throughout this code to specify the length of the input string */ @@ -756,56 +757,31 @@ done: /************************************************************************/ /*********** built in formatters ****************************************/ /************************************************************************/ -#ifdef FORMAT_STRING PyObject * -FORMAT_STRING(PyObject* value, PyObject* args) +FORMAT_STRING(PyObject *obj, + STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len) { - PyObject *format_spec; - PyObject *result = NULL; -#if PY_VERSION_HEX < 0x03000000 - PyObject *tmp = NULL; -#endif InternalFormatSpec format; - - /* If 2.x, we accept either str or unicode, and try to convert it - to the right type. In 3.x, we insist on only unicode */ -#if PY_VERSION_HEX >= 0x03000000 - if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__", - &format_spec)) - goto done; -#else - /* If 2.x, convert format_spec to the same type as value */ - /* This is to allow things like u''.format('') */ - if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) - goto done; - if (!(PyBytes_Check(format_spec) || PyUnicode_Check(format_spec))) { - PyErr_Format(PyExc_TypeError, "__format__ arg must be str " - "or unicode, not %s", Py_TYPE(format_spec)->tp_name); - goto done; - } - tmp = STRINGLIB_TOSTR(format_spec); - if (tmp == NULL) - goto done; - format_spec = tmp; -#endif + PyObject *result = NULL; /* check for the special case of zero length format spec, make - it equivalent to str(value) */ - if (STRINGLIB_LEN(format_spec) == 0) { - result = STRINGLIB_TOSTR(value); + it equivalent to str(obj) */ + if (format_spec_len == 0) { + result = STRINGLIB_TOSTR(obj); goto done; } - /* parse the format_spec */ - if (!parse_internal_render_format_spec(format_spec, &format, 's')) + if (!parse_internal_render_format_spec(format_spec, format_spec_len, + &format, 's')) goto done; /* type conversion? */ switch (format.type) { case 's': /* no type conversion needed, already a string. do the formatting */ - result = format_string_internal(value, &format); + result = format_string_internal(obj, &format); break; default: /* unknown */ @@ -826,35 +802,31 @@ FORMAT_STRING(PyObject* value, PyObject* args) } done: -#if PY_VERSION_HEX < 0x03000000 - Py_XDECREF(tmp); -#endif return result; } -#endif /* FORMAT_STRING */ #if defined FORMAT_LONG || defined FORMAT_INT static PyObject* -format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring) +format_int_or_long(PyObject* obj, + STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len, + IntOrLongToString tostring) { - PyObject *format_spec; PyObject *result = NULL; PyObject *tmp = NULL; InternalFormatSpec format; - if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__", - &format_spec)) - goto done; - /* check for the special case of zero length format spec, make - it equivalent to str(value) */ - if (STRINGLIB_LEN(format_spec) == 0) { - result = STRINGLIB_TOSTR(value); + it equivalent to str(obj) */ + if (format_spec_len == 0) { + result = STRINGLIB_TOSTR(obj); goto done; } /* parse the format_spec */ - if (!parse_internal_render_format_spec(format_spec, &format, 'd')) + if (!parse_internal_render_format_spec(format_spec, + format_spec_len, + &format, 'd')) goto done; /* type conversion? */ @@ -868,7 +840,7 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring) case 'n': /* no type conversion needed, already an int (or long). do the formatting */ - result = format_int_or_long_internal(value, &format, tostring); + result = format_int_or_long_internal(obj, &format, tostring); break; case 'e': @@ -879,10 +851,10 @@ format_int_or_long(PyObject* value, PyObject* args, IntOrLongToString tostring) case 'G': case '%': /* convert to float */ - tmp = PyNumber_Float(value); + tmp = PyNumber_Float(obj); if (tmp == NULL) goto done; - result = format_float_internal(value, &format); + result = format_float_internal(obj, &format); break; default: @@ -917,9 +889,12 @@ long_format(PyObject* value, int base) #endif PyObject * -FORMAT_LONG(PyObject* value, PyObject* args) +FORMAT_LONG(PyObject *obj, + STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len) { - return format_int_or_long(value, args, long_format); + return format_int_or_long(obj, format_spec, format_spec_len, + long_format); } #endif /* FORMAT_LONG */ @@ -935,32 +910,35 @@ int_format(PyObject* value, int base) } PyObject * -FORMAT_INT(PyObject* value, PyObject* args) +FORMAT_INT(PyObject *obj, + STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len) { - return format_int_or_long(value, args, int_format); + return format_int_or_long(obj, format_spec, format_spec_len, + int_format); } #endif /* FORMAT_INT */ #ifdef FORMAT_FLOAT PyObject * -FORMAT_FLOAT(PyObject *value, PyObject *args) +FORMAT_FLOAT(PyObject *obj, + STRINGLIB_CHAR *format_spec, + Py_ssize_t format_spec_len) { - PyObject *format_spec; PyObject *result = NULL; InternalFormatSpec format; - if (!PyArg_ParseTuple(args, STRINGLIB_PARSE_CODE ":__format__", &format_spec)) - goto done; - /* check for the special case of zero length format spec, make - it equivalent to str(value) */ - if (STRINGLIB_LEN(format_spec) == 0) { - result = STRINGLIB_TOSTR(value); + it equivalent to str(obj) */ + if (format_spec_len == 0) { + result = STRINGLIB_TOSTR(obj); goto done; } /* parse the format_spec */ - if (!parse_internal_render_format_spec(format_spec, &format, '\0')) + if (!parse_internal_render_format_spec(format_spec, + format_spec_len, + &format, '\0')) goto done; /* type conversion? */ @@ -979,7 +957,7 @@ FORMAT_FLOAT(PyObject *value, PyObject *args) case 'n': case '%': /* no conversion, already a float. do the formatting */ - result = format_float_internal(value, &format); + result = format_float_internal(obj, &format); break; default: diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 81b6a66..3ffe99c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -42,8 +42,6 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #define PY_SSIZE_T_CLEAN #include "Python.h" -#include "formatter_unicode.h" - #include "unicodeobject.h" #include "ucnhash.h" @@ -7863,6 +7861,35 @@ PyDoc_STRVAR(format__doc__, \n\ "); +static PyObject * +unicode__format__(PyObject *self, PyObject *args) +{ + PyObject *format_spec; + PyObject *result = NULL; + PyObject *tmp = NULL; + + /* If 2.x, convert format_spec to the same type as value */ + /* This is to allow things like u''.format('') */ + if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) + goto done; + if (!(PyBytes_Check(format_spec) || PyUnicode_Check(format_spec))) { + PyErr_Format(PyExc_TypeError, "__format__ arg must be str " + "or unicode, not %s", Py_TYPE(format_spec)->tp_name); + goto done; + } + tmp = PyObject_Unicode(format_spec); + if (tmp == NULL) + goto done; + format_spec = tmp; + + result = _PyUnicode_FormatAdvanced(self, + PyUnicode_AS_UNICODE(format_spec), + PyUnicode_GET_SIZE(format_spec)); +done: + Py_XDECREF(tmp); + return result; +} + PyDoc_STRVAR(p_format__doc__, "S.__format__(format_spec) -> unicode\n\ \n\ diff --git a/Python/formatter_string.c b/Python/formatter_string.c index 1041852..f33ad70 100644 --- a/Python/formatter_string.c +++ b/Python/formatter_string.c @@ -4,12 +4,11 @@ of int.__float__, etc., that take and return string objects */ #include "Python.h" -#include "formatter_string.h" - #include "../Objects/stringlib/stringdefs.h" -#define FORMAT_STRING string__format__ -#define FORMAT_LONG string_long__format__ -#define FORMAT_INT string_int__format__ -#define FORMAT_FLOAT string_float__format__ +#define FORMAT_STRING _PyBytes_FormatAdvanced +#define FORMAT_LONG _PyLong_FormatAdvanced +#define FORMAT_INT _PyInt_FormatAdvanced +#define FORMAT_FLOAT _PyFloat_FormatAdvanced + #include "../Objects/stringlib/formatter.h" diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 17c6944..4f2e53f 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -2,12 +2,12 @@ built-in formatter for unicode. That is, unicode.__format__(). */ #include "Python.h" -#include "formatter_unicode.h" - #include "../Objects/stringlib/unicodedefs.h" -#define FORMAT_STRING unicode__format__ +#define FORMAT_STRING _PyUnicode_FormatAdvanced + /* don't define FORMAT_LONG and FORMAT_FLOAT, since we can live with only the string versions of those. The builtin format() will convert them to unicode. */ + #include "../Objects/stringlib/formatter.h" -- cgit v0.12