diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2012-10-01 22:33:47 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2012-10-01 22:33:47 (GMT) |
commit | 621ef3d84f842a10dc9cb2af5ab9555b1663b79e (patch) | |
tree | 8f7553021772f75f5abb239eeee36246aabf6319 | |
parent | fd0d3e5d25cf9dcb751a329cf390388e0dbd8da2 (diff) | |
download | cpython-621ef3d84f842a10dc9cb2af5ab9555b1663b79e.zip cpython-621ef3d84f842a10dc9cb2af5ab9555b1663b79e.tar.gz cpython-621ef3d84f842a10dc9cb2af5ab9555b1663b79e.tar.bz2 |
Issue #15609: Optimize str%args for integer argument
- Use _PyLong_FormatWriter() instead of formatlong() when possible, to avoid
a temporary buffer
- Enable the fast path when width is smaller or equals to the length,
and when the precision is bigger or equals to the length
- Add unit tests!
- formatlong() uses PyUnicode_Resize() instead of _PyUnicode_FromASCII()
to resize the output string
-rw-r--r-- | Lib/test/test_format.py | 16 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 168 | ||||
-rw-r--r-- | Python/formatter_unicode.c | 3 |
3 files changed, 119 insertions, 68 deletions
diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index b6e2540..e6b0d20 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -307,6 +307,22 @@ class FormatTest(unittest.TestCase): finally: locale.setlocale(locale.LC_ALL, oldloc) + @support.cpython_only + def test_optimisations(self): + text = "abcde" # 5 characters + + self.assertIs("%s" % text, text) + self.assertIs("%.5s" % text, text) + self.assertIs("%.10s" % text, text) + self.assertIs("%1s" % text, text) + self.assertIs("%5s" % text, text) + + self.assertIs("{0}".format(text), text) + self.assertIs("{0:s}".format(text), text) + self.assertIs("{0:.5s}".format(text), text) + self.assertIs("{0:.10s}".format(text), text) + self.assertIs("{0:1s}".format(text), text) + self.assertIs("{0:5s}".format(text), text) def test_main(): diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 0da565a..606aa33 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13288,7 +13288,10 @@ formatlong(PyObject *val, int flags, int prec, int type) assert(PyLong_Check(val)); switch (type) { + default: + assert(!"'type' not in [diuoxX]"); case 'd': + case 'i': case 'u': /* Special-case boolean: we want 0/1 */ if (PyBool_Check(val)) @@ -13305,8 +13308,6 @@ formatlong(PyObject *val, int flags, int prec, int type) numnondigits = 2; result = PyNumber_ToBase(val, 16); break; - default: - assert(!"'type' not in [duoxX]"); } if (!result) return NULL; @@ -13379,15 +13380,94 @@ formatlong(PyObject *val, int flags, int prec, int type) if (buf[i] >= 'a' && buf[i] <= 'x') buf[i] -= 'a'-'A'; } - if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) { + if (!PyUnicode_Check(result) + || buf != PyUnicode_DATA(result)) { PyObject *unicode; unicode = _PyUnicode_FromASCII(buf, len); Py_DECREF(result); result = unicode; } + else if (len != PyUnicode_GET_LENGTH(result)) { + if (PyUnicode_Resize(&result, len) < 0) + Py_CLEAR(result); + } return result; } +/* Format an integer. + * Return 1 if the number has been formatted into the writer, + * 0 if the number has been formatted into *p_result + * -1 and raise an exception on error */ +static int +mainformatlong(_PyUnicodeWriter *writer, PyObject *v, + int c, Py_ssize_t width, int prec, int flags, + PyObject **p_result) +{ + PyObject *iobj, *res; + + if (!PyNumber_Check(v)) + goto wrongtype; + + if (!PyLong_Check(v)) { + iobj = PyNumber_Long(v); + if (iobj == NULL) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) + goto wrongtype; + return -1; + } + assert(PyLong_Check(iobj)); + } + else { + iobj = v; + Py_INCREF(iobj); + } + + if (PyLong_CheckExact(v) + && width == -1 && prec == -1 + && !(flags & (F_SIGN | F_BLANK)) + && c != 'X') + { + /* Fast path */ + int alternate = flags & F_ALT; + int base; + + switch(c) + { + default: + assert(0 && "'type' not in [diuoxX]"); + case 'd': + case 'i': + case 'u': + base = 10; + break; + case 'o': + base = 8; + break; + case 'x': + case 'X': + base = 16; + break; + } + + if (_PyLong_FormatWriter(writer, v, base, alternate) == -1) + return -1; + return 1; + } + + res = formatlong(iobj, flags, prec, c); + Py_DECREF(iobj); + if (res == NULL) + return -1; + *p_result = res; + return 0; + +wrongtype: + PyErr_Format(PyExc_TypeError, + "%%%c format: a number is required, " + "not %.200s", (char)c, Py_TYPE(v)->tp_name); + return -1; +} + static Py_UCS4 formatchar(PyObject *v) { @@ -13493,7 +13573,6 @@ PyUnicode_Format(PyObject *format, PyObject *args) Py_UCS4 fill; int sign; Py_UCS4 signchar; - int isnumok; PyObject *v = NULL; void *pbuf = NULL; Py_ssize_t pindex, len; @@ -13692,64 +13771,18 @@ PyUnicode_Format(PyObject *format, PyObject *args) case 'o': case 'x': case 'X': - if (PyLong_CheckExact(v) - && width == -1 && prec == -1 - && !(flags & (F_SIGN | F_BLANK))) - { - /* Fast path */ - switch(c) - { - case 'd': - case 'i': - case 'u': - if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1) - goto onError; - goto nextarg; - case 'x': - if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1) - goto onError; - goto nextarg; - case 'o': - if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1) - goto onError; - goto nextarg; - default: - break; - } - } - - isnumok = 0; - if (PyNumber_Check(v)) { - PyObject *iobj=NULL; - - if (PyLong_Check(v)) { - iobj = v; - Py_INCREF(iobj); - } - else { - iobj = PyNumber_Long(v); - } - if (iobj!=NULL) { - if (PyLong_Check(iobj)) { - isnumok = 1; - sign = 1; - temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c)); - Py_DECREF(iobj); - } - else { - Py_DECREF(iobj); - } - } - } - if (!isnumok) { - PyErr_Format(PyExc_TypeError, - "%%%c format: a number is required, " - "not %.200s", (char)c, Py_TYPE(v)->tp_name); + { + int ret = mainformatlong(&writer, v, c, width, prec, + flags, &temp); + if (ret == 1) + goto nextarg; + if (ret == -1) goto onError; - } + sign = 1; if (flags & F_ZERO) fill = '0'; break; + } case 'e': case 'E': @@ -13803,7 +13836,14 @@ PyUnicode_Format(PyObject *format, PyObject *args) goto onError; assert (PyUnicode_Check(temp)); - if (width == -1 && prec == -1 + if (PyUnicode_READY(temp) == -1) { + Py_CLEAR(temp); + goto onError; + } + + len = PyUnicode_GET_LENGTH(temp); + if ((width == -1 || width <= len) + && (prec == -1 || prec >= len) && !(flags & (F_SIGN | F_BLANK))) { /* Fast path */ @@ -13812,20 +13852,14 @@ PyUnicode_Format(PyObject *format, PyObject *args) goto nextarg; } - if (PyUnicode_READY(temp) == -1) { - Py_CLEAR(temp); - goto onError; - } - kind = PyUnicode_KIND(temp); - pbuf = PyUnicode_DATA(temp); - len = PyUnicode_GET_LENGTH(temp); - if (c == 's' || c == 'r' || c == 'a') { if (prec >= 0 && len > prec) len = prec; } /* pbuf is initialized here. */ + kind = PyUnicode_KIND(temp); + pbuf = PyUnicode_DATA(temp); pindex = 0; if (sign) { Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex); diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index aa62502..0ce9862 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -757,7 +757,8 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format, goto done; } - if (format->width == -1 && format->precision == -1) { + if ((format->width == -1 || format->width <= len) + && (format->precision == -1 || format->precision >= len)) { /* Fast path */ return _PyUnicodeWriter_WriteStr(writer, value); } |