diff options
author | Victor Stinner <victor.stinner@gmail.com> | 2015-10-09 20:43:24 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@gmail.com> | 2015-10-09 20:43:24 (GMT) |
commit | be75b8cf2310e0be0f32590a1c0df919058002e8 (patch) | |
tree | d722afaec725263f1bf050dd99040de7929d4d61 | |
parent | aecf63ee3a4408e9a6186b08d924d1492234dbea (diff) | |
download | cpython-be75b8cf2310e0be0f32590a1c0df919058002e8.zip cpython-be75b8cf2310e0be0f32590a1c0df919058002e8.tar.gz cpython-be75b8cf2310e0be0f32590a1c0df919058002e8.tar.bz2 |
Issue #25349: Optimize bytes % int
Optimize bytes.__mod__(args) for integere formats: %d (%i, %u), %o, %x and %X.
_PyBytesWriter is now used to format directly the integer into the writer
buffer, instead of using a temporary bytes object.
Formatting is between 30% and 50% faster on a microbenchmark.
-rw-r--r-- | Include/longobject.h | 7 | ||||
-rw-r--r-- | Objects/bytesobject.c | 36 | ||||
-rw-r--r-- | Objects/longobject.c | 120 |
3 files changed, 136 insertions, 27 deletions
diff --git a/Include/longobject.h b/Include/longobject.h index aed59ce..ab92495 100644 --- a/Include/longobject.h +++ b/Include/longobject.h @@ -182,6 +182,13 @@ PyAPI_FUNC(int) _PyLong_FormatWriter( int base, int alternate); +PyAPI_FUNC(char*) _PyLong_FormatBytesWriter( + _PyBytesWriter *writer, + char *str, + PyObject *obj, + int base, + int alternate); + /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ PyAPI_FUNC(int) _PyLong_FormatAdvancedWriter( diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 3aa905c..fd46048 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -841,6 +841,42 @@ _PyBytes_Format(PyObject *format, PyObject *args) case 'o': case 'x': case 'X': + if (PyLong_CheckExact(v) + && width == -1 && prec == -1 + && !(flags & (F_SIGN | F_BLANK)) + && c != 'X') + { + /* Fast path */ + int alternate = flags & F_ALT; + int base; + + switch(c) + { + default: + assert(0 && "'type' not in [diuoxX]"); + case 'd': + case 'i': + case 'u': + base = 10; + break; + case 'o': + base = 8; + break; + case 'x': + case 'X': + base = 16; + break; + } + + /* Fast path */ + writer.min_size -= 2; /* size preallocated by "%d" */ + res = _PyLong_FormatBytesWriter(&writer, res, + v, base, alternate); + if (res == NULL) + goto error; + continue; + } + temp = formatlong(v, flags, prec, c); if (!temp) goto error; diff --git a/Objects/longobject.c b/Objects/longobject.c index 5f22455..00f5d95 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1582,7 +1582,9 @@ divrem1(PyLongObject *a, digit n, digit *prem) static int long_to_decimal_string_internal(PyObject *aa, PyObject **p_output, - _PyUnicodeWriter *writer) + _PyUnicodeWriter *writer, + _PyBytesWriter *bytes_writer, + char **bytes_str) { PyLongObject *scratch, *a; PyObject *str; @@ -1664,6 +1666,13 @@ long_to_decimal_string_internal(PyObject *aa, kind = writer->kind; str = NULL; } + else if (bytes_writer) { + *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, strlen); + if (*bytes_str == NULL) { + Py_DECREF(scratch); + return -1; + } + } else { str = PyUnicode_New(strlen, '9'); if (str == NULL) { @@ -1673,13 +1682,8 @@ long_to_decimal_string_internal(PyObject *aa, kind = PyUnicode_KIND(str); } -#define WRITE_DIGITS(TYPE) \ +#define WRITE_DIGITS(p) \ do { \ - if (writer) \ - p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \ - else \ - p = (TYPE*)PyUnicode_DATA(str) + strlen; \ - \ /* pout[0] through pout[size-2] contribute exactly \ _PyLong_DECIMAL_SHIFT digits each */ \ for (i=0; i < size - 1; i++) { \ @@ -1699,6 +1703,16 @@ long_to_decimal_string_internal(PyObject *aa, /* and sign */ \ if (negative) \ *--p = '-'; \ + } while (0) + +#define WRITE_UNICODE_DIGITS(TYPE) \ + do { \ + if (writer) \ + p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \ + else \ + p = (TYPE*)PyUnicode_DATA(str) + strlen; \ + \ + WRITE_DIGITS(p); \ \ /* check we've counted correctly */ \ if (writer) \ @@ -1708,25 +1722,34 @@ long_to_decimal_string_internal(PyObject *aa, } while (0) /* fill the string right-to-left */ - if (kind == PyUnicode_1BYTE_KIND) { + if (bytes_writer) { + char *p = *bytes_str + strlen; + WRITE_DIGITS(p); + assert(p == *bytes_str); + } + else if (kind == PyUnicode_1BYTE_KIND) { Py_UCS1 *p; - WRITE_DIGITS(Py_UCS1); + WRITE_UNICODE_DIGITS(Py_UCS1); } else if (kind == PyUnicode_2BYTE_KIND) { Py_UCS2 *p; - WRITE_DIGITS(Py_UCS2); + WRITE_UNICODE_DIGITS(Py_UCS2); } else { Py_UCS4 *p; assert (kind == PyUnicode_4BYTE_KIND); - WRITE_DIGITS(Py_UCS4); + WRITE_UNICODE_DIGITS(Py_UCS4); } #undef WRITE_DIGITS +#undef WRITE_UNICODE_DIGITS Py_DECREF(scratch); if (writer) { writer->pos += strlen; } + else if (bytes_writer) { + (*bytes_str) += strlen; + } else { assert(_PyUnicode_CheckConsistency(str, 1)); *p_output = (PyObject *)str; @@ -1738,7 +1761,7 @@ static PyObject * long_to_decimal_string(PyObject *aa) { PyObject *v; - if (long_to_decimal_string_internal(aa, &v, NULL) == -1) + if (long_to_decimal_string_internal(aa, &v, NULL, NULL, NULL) == -1) return NULL; return v; } @@ -1750,7 +1773,8 @@ long_to_decimal_string(PyObject *aa) static int long_format_binary(PyObject *aa, int base, int alternate, - PyObject **p_output, _PyUnicodeWriter *writer) + PyObject **p_output, _PyUnicodeWriter *writer, + _PyBytesWriter *bytes_writer, char **bytes_str) { PyLongObject *a = (PyLongObject *)aa; PyObject *v; @@ -1812,6 +1836,11 @@ long_format_binary(PyObject *aa, int base, int alternate, kind = writer->kind; v = NULL; } + else if (writer) { + *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, sz); + if (*bytes_str == NULL) + return -1; + } else { v = PyUnicode_New(sz, 'x'); if (v == NULL) @@ -1819,13 +1848,8 @@ long_format_binary(PyObject *aa, int base, int alternate, kind = PyUnicode_KIND(v); } -#define WRITE_DIGITS(TYPE) \ +#define WRITE_DIGITS(p) \ do { \ - if (writer) \ - p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \ - else \ - p = (TYPE*)PyUnicode_DATA(v) + sz; \ - \ if (size_a == 0) { \ *--p = '0'; \ } \ @@ -1860,30 +1884,50 @@ long_format_binary(PyObject *aa, int base, int alternate, } \ if (negative) \ *--p = '-'; \ + } while (0) + +#define WRITE_UNICODE_DIGITS(TYPE) \ + do { \ + if (writer) \ + p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \ + else \ + p = (TYPE*)PyUnicode_DATA(v) + sz; \ + \ + WRITE_DIGITS(p); \ + \ if (writer) \ assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \ else \ assert(p == (TYPE*)PyUnicode_DATA(v)); \ } while (0) - if (kind == PyUnicode_1BYTE_KIND) { + if (bytes_writer) { + char *p = *bytes_str + sz; + WRITE_DIGITS(p); + assert(p == *bytes_str); + } + else if (kind == PyUnicode_1BYTE_KIND) { Py_UCS1 *p; - WRITE_DIGITS(Py_UCS1); + WRITE_UNICODE_DIGITS(Py_UCS1); } else if (kind == PyUnicode_2BYTE_KIND) { Py_UCS2 *p; - WRITE_DIGITS(Py_UCS2); + WRITE_UNICODE_DIGITS(Py_UCS2); } else { Py_UCS4 *p; assert (kind == PyUnicode_4BYTE_KIND); - WRITE_DIGITS(Py_UCS4); + WRITE_UNICODE_DIGITS(Py_UCS4); } #undef WRITE_DIGITS +#undef WRITE_UNICODE_DIGITS if (writer) { writer->pos += sz; } + else if (bytes_writer) { + (*bytes_str) += sz; + } else { assert(_PyUnicode_CheckConsistency(v, 1)); *p_output = v; @@ -1897,9 +1941,9 @@ _PyLong_Format(PyObject *obj, int base) PyObject *str; int err; if (base == 10) - err = long_to_decimal_string_internal(obj, &str, NULL); + err = long_to_decimal_string_internal(obj, &str, NULL, NULL, NULL); else - err = long_format_binary(obj, base, 1, &str, NULL); + err = long_format_binary(obj, base, 1, &str, NULL, NULL, NULL); if (err == -1) return NULL; return str; @@ -1911,9 +1955,31 @@ _PyLong_FormatWriter(_PyUnicodeWriter *writer, int base, int alternate) { if (base == 10) - return long_to_decimal_string_internal(obj, NULL, writer); + return long_to_decimal_string_internal(obj, NULL, writer, + NULL, NULL); else - return long_format_binary(obj, base, alternate, NULL, writer); + return long_format_binary(obj, base, alternate, NULL, writer, + NULL, NULL); +} + +char* +_PyLong_FormatBytesWriter(_PyBytesWriter *writer, char *str, + PyObject *obj, + int base, int alternate) +{ + char *str2; + int res; + str2 = str; + if (base == 10) + res = long_to_decimal_string_internal(obj, NULL, NULL, + writer, &str2); + else + res = long_format_binary(obj, base, alternate, NULL, NULL, + writer, &str2); + if (res < 0) + return NULL; + assert(str2 != NULL); + return str2; } /* Table of digit values for 8-bit string -> integer conversion. |