summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-10-01 22:33:47 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2012-10-01 22:33:47 (GMT)
commit621ef3d84f842a10dc9cb2af5ab9555b1663b79e (patch)
tree8f7553021772f75f5abb239eeee36246aabf6319
parentfd0d3e5d25cf9dcb751a329cf390388e0dbd8da2 (diff)
downloadcpython-621ef3d84f842a10dc9cb2af5ab9555b1663b79e.zip
cpython-621ef3d84f842a10dc9cb2af5ab9555b1663b79e.tar.gz
cpython-621ef3d84f842a10dc9cb2af5ab9555b1663b79e.tar.bz2
Issue #15609: Optimize str%args for integer argument
- Use _PyLong_FormatWriter() instead of formatlong() when possible, to avoid a temporary buffer - Enable the fast path when width is smaller or equals to the length, and when the precision is bigger or equals to the length - Add unit tests! - formatlong() uses PyUnicode_Resize() instead of _PyUnicode_FromASCII() to resize the output string
-rw-r--r--Lib/test/test_format.py16
-rw-r--r--Objects/unicodeobject.c168
-rw-r--r--Python/formatter_unicode.c3
3 files changed, 119 insertions, 68 deletions
diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py
index b6e2540..e6b0d20 100644
--- a/Lib/test/test_format.py
+++ b/Lib/test/test_format.py
@@ -307,6 +307,22 @@ class FormatTest(unittest.TestCase):
finally:
locale.setlocale(locale.LC_ALL, oldloc)
+ @support.cpython_only
+ def test_optimisations(self):
+ text = "abcde" # 5 characters
+
+ self.assertIs("%s" % text, text)
+ self.assertIs("%.5s" % text, text)
+ self.assertIs("%.10s" % text, text)
+ self.assertIs("%1s" % text, text)
+ self.assertIs("%5s" % text, text)
+
+ self.assertIs("{0}".format(text), text)
+ self.assertIs("{0:s}".format(text), text)
+ self.assertIs("{0:.5s}".format(text), text)
+ self.assertIs("{0:.10s}".format(text), text)
+ self.assertIs("{0:1s}".format(text), text)
+ self.assertIs("{0:5s}".format(text), text)
def test_main():
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 0da565a..606aa33 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -13288,7 +13288,10 @@ formatlong(PyObject *val, int flags, int prec, int type)
assert(PyLong_Check(val));
switch (type) {
+ default:
+ assert(!"'type' not in [diuoxX]");
case 'd':
+ case 'i':
case 'u':
/* Special-case boolean: we want 0/1 */
if (PyBool_Check(val))
@@ -13305,8 +13308,6 @@ formatlong(PyObject *val, int flags, int prec, int type)
numnondigits = 2;
result = PyNumber_ToBase(val, 16);
break;
- default:
- assert(!"'type' not in [duoxX]");
}
if (!result)
return NULL;
@@ -13379,15 +13380,94 @@ formatlong(PyObject *val, int flags, int prec, int type)
if (buf[i] >= 'a' && buf[i] <= 'x')
buf[i] -= 'a'-'A';
}
- if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) {
+ if (!PyUnicode_Check(result)
+ || buf != PyUnicode_DATA(result)) {
PyObject *unicode;
unicode = _PyUnicode_FromASCII(buf, len);
Py_DECREF(result);
result = unicode;
}
+ else if (len != PyUnicode_GET_LENGTH(result)) {
+ if (PyUnicode_Resize(&result, len) < 0)
+ Py_CLEAR(result);
+ }
return result;
}
+/* Format an integer.
+ * Return 1 if the number has been formatted into the writer,
+ * 0 if the number has been formatted into *p_result
+ * -1 and raise an exception on error */
+static int
+mainformatlong(_PyUnicodeWriter *writer, PyObject *v,
+ int c, Py_ssize_t width, int prec, int flags,
+ PyObject **p_result)
+{
+ PyObject *iobj, *res;
+
+ if (!PyNumber_Check(v))
+ goto wrongtype;
+
+ if (!PyLong_Check(v)) {
+ iobj = PyNumber_Long(v);
+ if (iobj == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_TypeError))
+ goto wrongtype;
+ return -1;
+ }
+ assert(PyLong_Check(iobj));
+ }
+ else {
+ iobj = v;
+ Py_INCREF(iobj);
+ }
+
+ if (PyLong_CheckExact(v)
+ && width == -1 && prec == -1
+ && !(flags & (F_SIGN | F_BLANK))
+ && c != 'X')
+ {
+ /* Fast path */
+ int alternate = flags & F_ALT;
+ int base;
+
+ switch(c)
+ {
+ default:
+ assert(0 && "'type' not in [diuoxX]");
+ case 'd':
+ case 'i':
+ case 'u':
+ base = 10;
+ break;
+ case 'o':
+ base = 8;
+ break;
+ case 'x':
+ case 'X':
+ base = 16;
+ break;
+ }
+
+ if (_PyLong_FormatWriter(writer, v, base, alternate) == -1)
+ return -1;
+ return 1;
+ }
+
+ res = formatlong(iobj, flags, prec, c);
+ Py_DECREF(iobj);
+ if (res == NULL)
+ return -1;
+ *p_result = res;
+ return 0;
+
+wrongtype:
+ PyErr_Format(PyExc_TypeError,
+ "%%%c format: a number is required, "
+ "not %.200s", (char)c, Py_TYPE(v)->tp_name);
+ return -1;
+}
+
static Py_UCS4
formatchar(PyObject *v)
{
@@ -13493,7 +13573,6 @@ PyUnicode_Format(PyObject *format, PyObject *args)
Py_UCS4 fill;
int sign;
Py_UCS4 signchar;
- int isnumok;
PyObject *v = NULL;
void *pbuf = NULL;
Py_ssize_t pindex, len;
@@ -13692,64 +13771,18 @@ PyUnicode_Format(PyObject *format, PyObject *args)
case 'o':
case 'x':
case 'X':
- if (PyLong_CheckExact(v)
- && width == -1 && prec == -1
- && !(flags & (F_SIGN | F_BLANK)))
- {
- /* Fast path */
- switch(c)
- {
- case 'd':
- case 'i':
- case 'u':
- if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
- goto onError;
- goto nextarg;
- case 'x':
- if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1)
- goto onError;
- goto nextarg;
- case 'o':
- if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1)
- goto onError;
- goto nextarg;
- default:
- break;
- }
- }
-
- isnumok = 0;
- if (PyNumber_Check(v)) {
- PyObject *iobj=NULL;
-
- if (PyLong_Check(v)) {
- iobj = v;
- Py_INCREF(iobj);
- }
- else {
- iobj = PyNumber_Long(v);
- }
- if (iobj!=NULL) {
- if (PyLong_Check(iobj)) {
- isnumok = 1;
- sign = 1;
- temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c));
- Py_DECREF(iobj);
- }
- else {
- Py_DECREF(iobj);
- }
- }
- }
- if (!isnumok) {
- PyErr_Format(PyExc_TypeError,
- "%%%c format: a number is required, "
- "not %.200s", (char)c, Py_TYPE(v)->tp_name);
+ {
+ int ret = mainformatlong(&writer, v, c, width, prec,
+ flags, &temp);
+ if (ret == 1)
+ goto nextarg;
+ if (ret == -1)
goto onError;
- }
+ sign = 1;
if (flags & F_ZERO)
fill = '0';
break;
+ }
case 'e':
case 'E':
@@ -13803,7 +13836,14 @@ PyUnicode_Format(PyObject *format, PyObject *args)
goto onError;
assert (PyUnicode_Check(temp));
- if (width == -1 && prec == -1
+ if (PyUnicode_READY(temp) == -1) {
+ Py_CLEAR(temp);
+ goto onError;
+ }
+
+ len = PyUnicode_GET_LENGTH(temp);
+ if ((width == -1 || width <= len)
+ && (prec == -1 || prec >= len)
&& !(flags & (F_SIGN | F_BLANK)))
{
/* Fast path */
@@ -13812,20 +13852,14 @@ PyUnicode_Format(PyObject *format, PyObject *args)
goto nextarg;
}
- if (PyUnicode_READY(temp) == -1) {
- Py_CLEAR(temp);
- goto onError;
- }
- kind = PyUnicode_KIND(temp);
- pbuf = PyUnicode_DATA(temp);
- len = PyUnicode_GET_LENGTH(temp);
-
if (c == 's' || c == 'r' || c == 'a') {
if (prec >= 0 && len > prec)
len = prec;
}
/* pbuf is initialized here. */
+ kind = PyUnicode_KIND(temp);
+ pbuf = PyUnicode_DATA(temp);
pindex = 0;
if (sign) {
Py_UCS4 ch = PyUnicode_READ(kind, pbuf, pindex);
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c
index aa62502..0ce9862 100644
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@@ -757,7 +757,8 @@ format_string_internal(PyObject *value, const InternalFormatSpec *format,
goto done;
}
- if (format->width == -1 && format->precision == -1) {
+ if ((format->width == -1 || format->width <= len)
+ && (format->precision == -1 || format->precision >= len)) {
/* Fast path */
return _PyUnicodeWriter_WriteStr(writer, value);
}