summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-05-29 10:57:52 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2012-05-29 10:57:52 (GMT)
commitd3f0882dfb3a15d604de1b1620b2bf8de9d643bb (patch)
tree16c78bd58f57ffce487f71bb075372d72cfdcbde /Objects
parenta1b0c9fc4d68cd4e1103456d0cedf2ef3bbbfe9a (diff)
downloadcpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.zip
cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.tar.gz
cpython-d3f0882dfb3a15d604de1b1620b2bf8de9d643bb.tar.bz2
Issue #14744: Use the new _PyUnicodeWriter internal API to speed up str%args and str.format(args)
* Formatting string, int, float and complex use the _PyUnicodeWriter API. It avoids a temporary buffer in most cases. * Add _PyUnicodeWriter_WriteStr() to restore the PyAccu optimization: just keep a reference to the string if the output is only composed of one string * Disable overallocation when formatting the last argument of str%args and str.format(args) * Overallocation allocates at least 100 characters: add min_length attribute to the _PyUnicodeWriter structure * Add new private functions: _PyUnicode_FastCopyCharacters(), _PyUnicode_FastFill() and _PyUnicode_FromASCII() The speed up is around 20% in average.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/complexobject.c17
-rw-r--r--Objects/floatobject.c27
-rw-r--r--Objects/longobject.c307
-rw-r--r--Objects/stringlib/asciilib.h2
-rw-r--r--Objects/stringlib/unicode_format.h46
-rw-r--r--Objects/unicodeobject.c362
6 files changed, 540 insertions, 221 deletions
diff --git a/Objects/complexobject.c b/Objects/complexobject.c
index b73dc4b..403c60c 100644
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -699,11 +699,22 @@ static PyObject *
complex__format__(PyObject* self, PyObject* args)
{
PyObject *format_spec;
+ _PyUnicodeWriter writer;
+ int ret;
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
- return NULL;
- return _PyComplex_FormatAdvanced(self, format_spec, 0,
- PyUnicode_GET_LENGTH(format_spec));
+ return NULL;
+
+ _PyUnicodeWriter_Init(&writer, 0);
+ ret = _PyComplex_FormatAdvancedWriter(
+ &writer,
+ self,
+ format_spec, 0, PyUnicode_GET_LENGTH(format_spec));
+ if (ret == -1) {
+ _PyUnicodeWriter_Dealloc(&writer);
+ return NULL;
+ }
+ return _PyUnicodeWriter_Finish(&writer);
}
#if 0
diff --git a/Objects/floatobject.c b/Objects/floatobject.c
index 07d31b2..3c742c3 100644
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -267,13 +267,15 @@ static PyObject *
float_repr(PyFloatObject *v)
{
PyObject *result;
- char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
- 'r', 0,
- Py_DTSF_ADD_DOT_0,
- NULL);
+ char *buf;
+
+ buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
+ 'r', 0,
+ Py_DTSF_ADD_DOT_0,
+ NULL);
if (!buf)
return PyErr_NoMemory();
- result = PyUnicode_FromString(buf);
+ result = _PyUnicode_FromASCII(buf, strlen(buf));
PyMem_Free(buf);
return result;
}
@@ -1703,11 +1705,22 @@ static PyObject *
float__format__(PyObject *self, PyObject *args)
{
PyObject *format_spec;
+ _PyUnicodeWriter writer;
+ int ret;
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL;
- return _PyFloat_FormatAdvanced(self, format_spec, 0,
- PyUnicode_GET_LENGTH(format_spec));
+
+ _PyUnicodeWriter_Init(&writer, 0);
+ ret = _PyFloat_FormatAdvancedWriter(
+ &writer,
+ self,
+ format_spec, 0, PyUnicode_GET_LENGTH(format_spec));
+ if (ret == -1) {
+ _PyUnicodeWriter_Dealloc(&writer);
+ return NULL;
+ }
+ return _PyUnicodeWriter_Finish(&writer);
}
PyDoc_STRVAR(float__format__doc,
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 74c59c7..1369dac 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1550,20 +1550,22 @@ divrem1(PyLongObject *a, digit n, digit *prem)
string. (Return value is non-shared so that callers can modify the
returned value if necessary.) */
-static PyObject *
-long_to_decimal_string(PyObject *aa)
+static int
+long_to_decimal_string_internal(PyObject *aa,
+ PyObject **p_output,
+ _PyUnicodeWriter *writer)
{
PyLongObject *scratch, *a;
PyObject *str;
Py_ssize_t size, strlen, size_a, i, j;
digit *pout, *pin, rem, tenpow;
- unsigned char *p;
int negative;
+ enum PyUnicode_Kind kind;
a = (PyLongObject *)aa;
if (a == NULL || !PyLong_Check(a)) {
PyErr_BadInternalCall();
- return NULL;
+ return -1;
}
size_a = ABS(Py_SIZE(a));
negative = Py_SIZE(a) < 0;
@@ -1580,13 +1582,13 @@ long_to_decimal_string(PyObject *aa)
if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) {
PyErr_SetString(PyExc_OverflowError,
"long is too large to format");
- return NULL;
+ return -1;
}
/* the expression size_a * PyLong_SHIFT is now safe from overflow */
size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT);
scratch = _PyLong_New(size);
if (scratch == NULL)
- return NULL;
+ return -1;
/* convert array of base _PyLong_BASE digits in pin to an array of
base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP,
@@ -1609,7 +1611,7 @@ long_to_decimal_string(PyObject *aa)
/* check for keyboard interrupt */
SIGCHECK({
Py_DECREF(scratch);
- return NULL;
+ return -1;
});
}
/* pout should have at least one digit, so that the case when a = 0
@@ -1625,65 +1627,113 @@ long_to_decimal_string(PyObject *aa)
tenpow *= 10;
strlen++;
}
- str = PyUnicode_New(strlen, '9');
- if (str == NULL) {
- Py_DECREF(scratch);
- return NULL;
+ if (writer) {
+ if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1)
+ return -1;
+ kind = writer->kind;
+ str = NULL;
}
+ else {
+ str = PyUnicode_New(strlen, '9');
+ if (str == NULL) {
+ Py_DECREF(scratch);
+ return -1;
+ }
+ kind = PyUnicode_KIND(str);
+ }
+
+#define WRITE_DIGITS(TYPE) \
+ do { \
+ if (writer) \
+ p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \
+ else \
+ p = (TYPE*)PyUnicode_DATA(str) + strlen; \
+ \
+ *p = '\0'; \
+ /* pout[0] through pout[size-2] contribute exactly \
+ _PyLong_DECIMAL_SHIFT digits each */ \
+ for (i=0; i < size - 1; i++) { \
+ rem = pout[i]; \
+ for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { \
+ *--p = '0' + rem % 10; \
+ rem /= 10; \
+ } \
+ } \
+ /* pout[size-1]: always produce at least one decimal digit */ \
+ rem = pout[i]; \
+ do { \
+ *--p = '0' + rem % 10; \
+ rem /= 10; \
+ } while (rem != 0); \
+ \
+ /* and sign */ \
+ if (negative) \
+ *--p = '-'; \
+ \
+ /* check we've counted correctly */ \
+ if (writer) \
+ assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \
+ else \
+ assert(p == (TYPE*)PyUnicode_DATA(str)); \
+ } while (0)
/* fill the string right-to-left */
- assert(PyUnicode_KIND(str) == PyUnicode_1BYTE_KIND);
- p = PyUnicode_1BYTE_DATA(str) + strlen;
- *p = '\0';
- /* pout[0] through pout[size-2] contribute exactly
- _PyLong_DECIMAL_SHIFT digits each */
- for (i=0; i < size - 1; i++) {
- rem = pout[i];
- for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) {
- *--p = '0' + rem % 10;
- rem /= 10;
- }
+ if (kind == PyUnicode_1BYTE_KIND) {
+ Py_UCS1 *p;
+ WRITE_DIGITS(Py_UCS1);
}
- /* pout[size-1]: always produce at least one decimal digit */
- rem = pout[i];
- do {
- *--p = '0' + rem % 10;
- rem /= 10;
- } while (rem != 0);
-
- /* and sign */
- if (negative)
- *--p = '-';
+ else if (kind == PyUnicode_2BYTE_KIND) {
+ Py_UCS2 *p;
+ WRITE_DIGITS(Py_UCS2);
+ }
+ else {
+ assert (kind == PyUnicode_4BYTE_KIND);
+ Py_UCS4 *p;
+ WRITE_DIGITS(Py_UCS4);
+ }
+#undef WRITE_DIGITS
- /* check we've counted correctly */
- assert(p == PyUnicode_1BYTE_DATA(str));
- assert(_PyUnicode_CheckConsistency(str, 1));
Py_DECREF(scratch);
- return (PyObject *)str;
+ if (writer) {
+ writer->pos += strlen;
+ }
+ else {
+ assert(_PyUnicode_CheckConsistency(str, 1));
+ *p_output = (PyObject *)str;
+ }
+ return 0;
+}
+
+static PyObject *
+long_to_decimal_string(PyObject *aa)
+{
+ PyObject *v;
+ if (long_to_decimal_string_internal(aa, &v, NULL) == -1)
+ return NULL;
+ return v;
}
/* Convert a long int object to a string, using a given conversion base,
- which should be one of 2, 8, 10 or 16. Return a string object.
- If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'. */
+ which should be one of 2, 8 or 16. Return a string object.
+ If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'
+ if alternate is nonzero. */
-PyObject *
-_PyLong_Format(PyObject *aa, int base)
+static int
+long_format_binary(PyObject *aa, int base, int alternate,
+ PyObject **p_output, _PyUnicodeWriter *writer)
{
register PyLongObject *a = (PyLongObject *)aa;
PyObject *v;
Py_ssize_t sz;
Py_ssize_t size_a;
- Py_UCS1 *p;
+ enum PyUnicode_Kind kind;
int negative;
int bits;
- assert(base == 2 || base == 8 || base == 10 || base == 16);
- if (base == 10)
- return long_to_decimal_string((PyObject *)a);
-
+ assert(base == 2 || base == 8 || base == 16);
if (a == NULL || !PyLong_Check(a)) {
PyErr_BadInternalCall();
- return NULL;
+ return -1;
}
size_a = ABS(Py_SIZE(a));
negative = Py_SIZE(a) < 0;
@@ -1706,7 +1756,7 @@ _PyLong_Format(PyObject *aa, int base)
/* Compute exact length 'sz' of output string. */
if (size_a == 0) {
- sz = 3;
+ sz = 1;
}
else {
Py_ssize_t size_a_in_bits;
@@ -1714,56 +1764,126 @@ _PyLong_Format(PyObject *aa, int base)
if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT) {
PyErr_SetString(PyExc_OverflowError,
"int is too large to format");
- return NULL;
+ return -1;
}
size_a_in_bits = (size_a - 1) * PyLong_SHIFT +
bits_in_digit(a->ob_digit[size_a - 1]);
- /* Allow 2 characters for prefix and 1 for a '-' sign. */
- sz = 2 + negative + (size_a_in_bits + (bits - 1)) / bits;
+ /* Allow 1 character for a '-' sign. */
+ sz = negative + (size_a_in_bits + (bits - 1)) / bits;
+ }
+ if (alternate) {
+ /* 2 characters for prefix */
+ sz += 2;
}
- v = PyUnicode_New(sz, 'x');
- if (v == NULL) {
- return NULL;
+ if (writer) {
+ if (_PyUnicodeWriter_Prepare(writer, sz, 'x') == -1)
+ return -1;
+ kind = writer->kind;
+ v = NULL;
+ }
+ else {
+ v = PyUnicode_New(sz, 'x');
+ if (v == NULL)
+ return -1;
+ kind = PyUnicode_KIND(v);
+ }
+
+#define WRITE_DIGITS(TYPE) \
+ do { \
+ if (writer) \
+ p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \
+ else \
+ p = (TYPE*)PyUnicode_DATA(v) + sz; \
+ \
+ if (size_a == 0) { \
+ *--p = '0'; \
+ } \
+ else { \
+ /* JRH: special case for power-of-2 bases */ \
+ twodigits accum = 0; \
+ int accumbits = 0; /* # of bits in accum */ \
+ Py_ssize_t i; \
+ for (i = 0; i < size_a; ++i) { \
+ accum |= (twodigits)a->ob_digit[i] << accumbits; \
+ accumbits += PyLong_SHIFT; \
+ assert(accumbits >= bits); \
+ do { \
+ char cdigit; \
+ cdigit = (char)(accum & (base - 1)); \
+ cdigit += (cdigit < 10) ? '0' : 'a'-10; \
+ *--p = cdigit; \
+ accumbits -= bits; \
+ accum >>= bits; \
+ } while (i < size_a-1 ? accumbits >= bits : accum > 0); \
+ } \
+ } \
+ \
+ if (alternate) { \
+ if (base == 16) \
+ *--p = 'x'; \
+ else if (base == 8) \
+ *--p = 'o'; \
+ else /* (base == 2) */ \
+ *--p = 'b'; \
+ *--p = '0'; \
+ } \
+ if (negative) \
+ *--p = '-'; \
+ if (writer) \
+ assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \
+ else \
+ assert(p == (TYPE*)PyUnicode_DATA(v)); \
+ } while (0)
+
+ if (kind == PyUnicode_1BYTE_KIND) {
+ Py_UCS1 *p;
+ WRITE_DIGITS(Py_UCS1);
+ }
+ else if (kind == PyUnicode_2BYTE_KIND) {
+ Py_UCS2 *p;
+ WRITE_DIGITS(Py_UCS2);
}
- assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
+ else {
+ assert (kind == PyUnicode_4BYTE_KIND);
+ Py_UCS4 *p;
+ WRITE_DIGITS(Py_UCS4);
+ }
+#undef WRITE_DIGITS
- p = PyUnicode_1BYTE_DATA(v) + sz;
- if (size_a == 0) {
- *--p = '0';
+ if (writer) {
+ writer->pos += sz;
}
else {
- /* JRH: special case for power-of-2 bases */
- twodigits accum = 0;
- int accumbits = 0; /* # of bits in accum */
- Py_ssize_t i;
- for (i = 0; i < size_a; ++i) {
- accum |= (twodigits)a->ob_digit[i] << accumbits;
- accumbits += PyLong_SHIFT;
- assert(accumbits >= bits);
- do {
- char cdigit;
- cdigit = (char)(accum & (base - 1));
- cdigit += (cdigit < 10) ? '0' : 'a'-10;
- *--p = cdigit;
- accumbits -= bits;
- accum >>= bits;
- } while (i < size_a-1 ? accumbits >= bits : accum > 0);
- }
+ assert(_PyUnicode_CheckConsistency(v, 1));
+ *p_output = v;
}
+ return 0;
+}
- if (base == 16)
- *--p = 'x';
- else if (base == 8)
- *--p = 'o';
- else /* (base == 2) */
- *--p = 'b';
- *--p = '0';
- if (negative)
- *--p = '-';
- assert(p == PyUnicode_1BYTE_DATA(v));
- assert(_PyUnicode_CheckConsistency(v, 1));
- return v;
+PyObject *
+_PyLong_Format(PyObject *obj, int base)
+{
+ PyObject *str;
+ int err;
+ if (base == 10)
+ err = long_to_decimal_string_internal(obj, &str, NULL);
+ else
+ err = long_format_binary(obj, base, 1, &str, NULL);
+ if (err == -1)
+ return NULL;
+ return str;
+}
+
+int
+_PyLong_FormatWriter(_PyUnicodeWriter *writer,
+ PyObject *obj,
+ int base, int alternate)
+{
+ if (base == 10)
+ return long_to_decimal_string_internal(obj, NULL, writer);
+ else
+ return long_format_binary(obj, base, alternate, NULL, writer);
}
/* Table of digit values for 8-bit string -> integer conversion.
@@ -4232,11 +4352,22 @@ static PyObject *
long__format__(PyObject *self, PyObject *args)
{
PyObject *format_spec;
+ _PyUnicodeWriter writer;
+ int ret;
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL;
- return _PyLong_FormatAdvanced(self, format_spec, 0,
- PyUnicode_GET_LENGTH(format_spec));
+
+ _PyUnicodeWriter_Init(&writer, 0);
+ ret = _PyLong_FormatAdvancedWriter(
+ &writer,
+ self,
+ format_spec, 0, PyUnicode_GET_LENGTH(format_spec));
+ if (ret == -1) {
+ _PyUnicodeWriter_Dealloc(&writer);
+ return NULL;
+ }
+ return _PyUnicodeWriter_Finish(&writer);
}
/* Return a pair (q, r) such that a = b * q + r, and
diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h
index ab5bae7..f62813d 100644
--- a/Objects/stringlib/asciilib.h
+++ b/Objects/stringlib/asciilib.h
@@ -18,7 +18,7 @@
#define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
-#define STRINGLIB_NEW unicode_fromascii
+#define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN))
#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h
index 9c0b0cf..d71cf44 100644
--- a/Objects/stringlib/unicode_format.h
+++ b/Objects/stringlib/unicode_format.h
@@ -499,26 +499,26 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write
int ok = 0;
PyObject *result = NULL;
PyObject *format_spec_object = NULL;
- PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
- Py_ssize_t len;
+ int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
+ int err;
/* If we know the type exactly, skip the lookup of __format__ and just
call the formatter directly. */
if (PyUnicode_CheckExact(fieldobj))
- formatter = _PyUnicode_FormatAdvanced;
+ formatter = _PyUnicode_FormatAdvancedWriter;
else if (PyLong_CheckExact(fieldobj))
- formatter =_PyLong_FormatAdvanced;
+ formatter = _PyLong_FormatAdvancedWriter;
else if (PyFloat_CheckExact(fieldobj))
- formatter = _PyFloat_FormatAdvanced;
-
- /* XXX: for 2.6, convert format_spec to the appropriate type
- (unicode, str) */
+ formatter = _PyFloat_FormatAdvancedWriter;
+ else if (PyComplex_CheckExact(fieldobj))
+ formatter = _PyComplex_FormatAdvancedWriter;
if (formatter) {
/* we know exactly which formatter will be called when __format__ is
looked up, so call it directly, instead. */
- result = formatter(fieldobj, format_spec->str,
- format_spec->start, format_spec->end);
+ err = formatter(writer, fieldobj, format_spec->str,
+ format_spec->start, format_spec->end);
+ return (err == 0);
}
else {
/* We need to create an object out of the pointers we have, because
@@ -536,17 +536,11 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write
}
if (result == NULL)
goto done;
- if (PyUnicode_READY(result) == -1)
- goto done;
- len = PyUnicode_GET_LENGTH(result);
- if (_PyUnicodeWriter_Prepare(writer,
- len, PyUnicode_MAX_CHAR_VALUE(result)) == -1)
+ if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
goto done;
- copy_characters(writer->buffer, writer->pos,
- result, 0, len);
- writer->pos += len;
ok = 1;
+
done:
Py_XDECREF(format_spec_object);
Py_XDECREF(result);
@@ -897,16 +891,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
if (err == -1)
return 0;
- copy_characters(writer->buffer, writer->pos,
- literal.str, literal.start, sublen);
+ _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+ literal.str, literal.start, sublen);
writer->pos += sublen;
}
- if (field_present)
+ if (field_present) {
+ if (iter.str.start == iter.str.end)
+ writer->flags.overallocate = 0;
if (!output_markup(&field_name, &format_spec,
format_spec_needs_expanding, conversion, writer,
args, kwargs, recursion_depth, auto_number))
return 0;
+ }
}
return result;
}
@@ -921,7 +918,7 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number)
{
_PyUnicodeWriter writer;
- Py_ssize_t initlen;
+ Py_ssize_t minlen;
/* check the recursion level */
if (recursion_depth <= 0) {
@@ -930,9 +927,8 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
return NULL;
}
- initlen = PyUnicode_GET_LENGTH(input->str) + 100;
- if (_PyUnicodeWriter_Init(&writer, initlen, 127) == -1)
- return NULL;
+ minlen = PyUnicode_GET_LENGTH(input->str) + 100;
+ _PyUnicodeWriter_Init(&writer, minlen);
if (!do_markup(input, args, kwargs, &writer, recursion_depth,
auto_number)) {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 8fbc203..00bfff2 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -225,16 +225,10 @@ const unsigned char _Py_ascii_whitespace[] = {
/* forward */
static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);
static PyObject* get_latin1_char(unsigned char ch);
-static void copy_characters(
- PyObject *to, Py_ssize_t to_start,
- PyObject *from, Py_ssize_t from_start,
- Py_ssize_t how_many);
static int unicode_modifiable(PyObject *unicode);
static PyObject *
-unicode_fromascii(const unsigned char *s, Py_ssize_t size);
-static PyObject *
_PyUnicode_FromUCS1(const unsigned char *s, Py_ssize_t size);
static PyObject *
_PyUnicode_FromUCS2(const Py_UCS2 *s, Py_ssize_t size);
@@ -783,7 +777,7 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
return NULL;
copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode));
- copy_characters(copy, 0, unicode, 0, copy_length);
+ _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length);
return copy;
}
else {
@@ -1154,15 +1148,16 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
assert(0 <= from_start);
assert(0 <= to_start);
assert(PyUnicode_Check(from));
- assert(PyUnicode_Check(to));
assert(PyUnicode_IS_READY(from));
- assert(PyUnicode_IS_READY(to));
assert(from_start + how_many <= PyUnicode_GET_LENGTH(from));
- assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
if (how_many == 0)
return 0;
+ assert(PyUnicode_Check(to));
+ assert(PyUnicode_IS_READY(to));
+ assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
+
from_kind = PyUnicode_KIND(from);
from_data = PyUnicode_DATA(from);
to_kind = PyUnicode_KIND(to);
@@ -1267,10 +1262,10 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
return 0;
}
-static void
-copy_characters(PyObject *to, Py_ssize_t to_start,
- PyObject *from, Py_ssize_t from_start,
- Py_ssize_t how_many)
+void
+_PyUnicode_FastCopyCharacters(
+ PyObject *to, Py_ssize_t to_start,
+ PyObject *from, Py_ssize_t from_start, Py_ssize_t how_many)
{
(void)_copy_characters(to, to_start, from, from_start, how_many, 0);
}
@@ -1292,6 +1287,14 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start,
if (PyUnicode_READY(to) == -1)
return -1;
+ if (from_start < 0) {
+ PyErr_SetString(PyExc_IndexError, "string index out of range");
+ return -1;
+ }
+ if (to_start < 0) {
+ PyErr_SetString(PyExc_IndexError, "string index out of range");
+ return -1;
+ }
how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many);
if (to_start + how_many > PyUnicode_GET_LENGTH(to)) {
PyErr_Format(PyExc_SystemError,
@@ -1641,7 +1644,7 @@ unicode_widen(PyObject **p_unicode, Py_ssize_t length,
maxchar);
if (result == NULL)
return -1;
- PyUnicode_CopyCharacters(result, 0, *p_unicode, 0, length);
+ _PyUnicode_FastCopyCharacters(result, 0, *p_unicode, 0, length);
Py_DECREF(*p_unicode);
*p_unicode = result;
return 0;
@@ -1841,9 +1844,10 @@ _PyUnicode_ClearStaticStrings()
/* Internal function, doesn't check maximum character */
-static PyObject*
-unicode_fromascii(const unsigned char* s, Py_ssize_t size)
+PyObject*
+_PyUnicode_FromASCII(const char *buffer, Py_ssize_t size)
{
+ const unsigned char *s = (const unsigned char *)buffer;
PyObject *unicode;
if (size == 1) {
#ifdef Py_DEBUG
@@ -2085,7 +2089,7 @@ unicode_adjust_maxchar(PyObject **p_unicode)
return;
}
copy = PyUnicode_New(len, max_char);
- copy_characters(copy, 0, unicode, 0, len);
+ _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len);
Py_DECREF(unicode);
*p_unicode = copy;
}
@@ -2753,7 +2757,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
(void) va_arg(vargs, char *);
size = PyUnicode_GET_LENGTH(*callresult);
assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
- copy_characters(string, i, *callresult, 0, size);
+ _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
i += size;
/* We're done with the unicode()/repr() => forget it */
Py_DECREF(*callresult);
@@ -2767,7 +2771,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
Py_ssize_t size;
assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
size = PyUnicode_GET_LENGTH(obj);
- copy_characters(string, i, obj, 0, size);
+ _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
i += size;
break;
}
@@ -2779,13 +2783,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
if (obj) {
size = PyUnicode_GET_LENGTH(obj);
assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string));
- copy_characters(string, i, obj, 0, size);
+ _PyUnicode_FastCopyCharacters(string, i, obj, 0, size);
i += size;
} else {
size = PyUnicode_GET_LENGTH(*callresult);
assert(PyUnicode_KIND(*callresult) <=
PyUnicode_KIND(string));
- copy_characters(string, i, *callresult, 0, size);
+ _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
i += size;
Py_DECREF(*callresult);
}
@@ -2800,7 +2804,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
/* unused, since we already have the result */
(void) va_arg(vargs, PyObject *);
assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string));
- copy_characters(string, i, *callresult, 0, size);
+ _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size);
i += size;
/* We're done with the unicode()/repr() => forget it */
Py_DECREF(*callresult);
@@ -4171,7 +4175,7 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler,
if (unicode_widen(output, *outpos,
PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0)
goto onError;
- copy_characters(*output, *outpos, repunicode, 0, replen);
+ _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen);
*outpos += replen;
}
else {
@@ -9216,12 +9220,14 @@ fixup(PyObject *self,
/* If the maxchar increased so that the kind changed, not all
characters are representable anymore and we need to fix the
string again. This only happens in very few cases. */
- copy_characters(v, 0, self, 0, PyUnicode_GET_LENGTH(self));
+ _PyUnicode_FastCopyCharacters(v, 0,
+ self, 0, PyUnicode_GET_LENGTH(self));
maxchar_old = fixfct(v);
assert(maxchar_old > 0 && maxchar_old <= maxchar_new);
}
else {
- copy_characters(v, 0, u, 0, PyUnicode_GET_LENGTH(self));
+ _PyUnicode_FastCopyCharacters(v, 0,
+ u, 0, PyUnicode_GET_LENGTH(self));
}
Py_DECREF(u);
assert(_PyUnicode_CheckConsistency(v, 1));
@@ -9603,7 +9609,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
res_data += kind * seplen;
}
else {
- copy_characters(res, res_offset, sep, 0, seplen);
+ _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen);
res_offset += seplen;
}
}
@@ -9616,7 +9622,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
res_data += kind * itemlen;
}
else {
- copy_characters(res, res_offset, item, 0, itemlen);
+ _PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen);
res_offset += itemlen;
}
}
@@ -9663,13 +9669,25 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
} \
} while (0)
+void
+_PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
+ Py_UCS4 fill_char)
+{
+ const enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
+ const void *data = PyUnicode_DATA(unicode);
+ assert(PyUnicode_IS_READY(unicode));
+ assert(unicode_modifiable(unicode));
+ assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
+ assert(start >= 0);
+ assert(start + length <= PyUnicode_GET_LENGTH(unicode));
+ FILL(kind, data, fill_char, start, length);
+}
+
Py_ssize_t
PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
Py_UCS4 fill_char)
{
Py_ssize_t maxlen;
- enum PyUnicode_Kind kind;
- void *data;
if (!PyUnicode_Check(unicode)) {
PyErr_BadInternalCall();
@@ -9680,6 +9698,10 @@ PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
if (unicode_check_modifiable(unicode))
return -1;
+ if (start < 0) {
+ PyErr_SetString(PyExc_IndexError, "string index out of range");
+ return -1;
+ }
if (fill_char > PyUnicode_MAX_CHAR_VALUE(unicode)) {
PyErr_SetString(PyExc_ValueError,
"fill character is bigger than "
@@ -9692,9 +9714,7 @@ PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
if (length <= 0)
return 0;
- kind = PyUnicode_KIND(unicode);
- data = PyUnicode_DATA(unicode);
- FILL(kind, data, fill_char, start, length);
+ _PyUnicode_FastFill(unicode, start, length, fill_char);
return length;
}
@@ -9734,7 +9754,7 @@ pad(PyObject *self,
FILL(kind, data, fill, 0, left);
if (right)
FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
- copy_characters(u, left, self, 0, _PyUnicode_LENGTH(self));
+ _PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self));
assert(_PyUnicode_CheckConsistency(u, 1));
return u;
}
@@ -10058,7 +10078,7 @@ replace(PyObject *self, PyObject *str1,
u = PyUnicode_New(slen, maxchar);
if (!u)
goto error;
- copy_characters(u, 0, self, 0, slen);
+ _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen);
rkind = PyUnicode_KIND(u);
PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2);
@@ -10626,8 +10646,8 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
w = PyUnicode_New(new_len, maxchar);
if (w == NULL)
goto onError;
- copy_characters(w, 0, u, 0, u_len);
- copy_characters(w, u_len, v, 0, v_len);
+ _PyUnicode_FastCopyCharacters(w, 0, u, 0, u_len);
+ _PyUnicode_FastCopyCharacters(w, u_len, v, 0, v_len);
Py_DECREF(u);
Py_DECREF(v);
assert(_PyUnicode_CheckConsistency(w, 1));
@@ -10702,7 +10722,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
goto error;
}
/* copy 'right' into the newly allocated area of 'left' */
- copy_characters(*p_left, left_len, right, 0, right_len);
+ _PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len);
}
else {
maxchar = PyUnicode_MAX_CHAR_VALUE(left);
@@ -10713,8 +10733,8 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
res = PyUnicode_New(new_len, maxchar);
if (res == NULL)
goto error;
- copy_characters(res, 0, left, 0, left_len);
- copy_characters(res, left_len, right, 0, right_len);
+ _PyUnicode_FastCopyCharacters(res, 0, left, 0, left_len);
+ _PyUnicode_FastCopyCharacters(res, left_len, right, 0, right_len);
Py_DECREF(left);
*p_left = res;
}
@@ -11650,7 +11670,7 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
length = end - start;
if (PyUnicode_IS_ASCII(self)) {
data = PyUnicode_1BYTE_DATA(self);
- return unicode_fromascii(data + start, length);
+ return _PyUnicode_FromASCII((char*)(data + start), length);
}
else {
kind = PyUnicode_KIND(self);
@@ -12769,60 +12789,74 @@ unicode_endswith(PyObject *self,
return PyBool_FromLong(result);
}
-typedef struct {
- PyObject *buffer;
- void *data;
- enum PyUnicode_Kind kind;
- Py_UCS4 maxchar;
- Py_ssize_t pos;
-} _PyUnicodeWriter ;
-
Py_LOCAL_INLINE(void)
_PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
{
+ writer->size = PyUnicode_GET_LENGTH(writer->buffer);
writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
writer->data = PyUnicode_DATA(writer->buffer);
writer->kind = PyUnicode_KIND(writer->buffer);
}
-Py_LOCAL(int)
-_PyUnicodeWriter_Init(_PyUnicodeWriter *writer,
- Py_ssize_t length, Py_UCS4 maxchar)
+void
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length)
{
- writer->pos = 0;
- writer->buffer = PyUnicode_New(length, maxchar);
- if (writer->buffer == NULL)
- return -1;
- _PyUnicodeWriter_Update(writer);
- return 0;
+ memset(writer, 0, sizeof(*writer));
+#ifdef Py_DEBUG
+ writer->kind = 5; /* invalid kind */
+#endif
+ writer->min_length = Py_MAX(min_length, 100);
+ writer->flags.overallocate = (min_length > 0);
}
-Py_LOCAL_INLINE(int)
-_PyUnicodeWriter_Prepare(_PyUnicodeWriter *writer,
- Py_ssize_t length, Py_UCS4 maxchar)
+int
+_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
+ Py_ssize_t length, Py_UCS4 maxchar)
{
Py_ssize_t newlen;
PyObject *newbuffer;
+ assert(length > 0);
+
if (length > PY_SSIZE_T_MAX - writer->pos) {
PyErr_NoMemory();
return -1;
}
newlen = writer->pos + length;
- if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) {
- /* overallocate 25% to limit the number of resize */
- if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
- newlen += newlen / 4;
+ if (writer->buffer == NULL) {
+ if (writer->flags.overallocate) {
+ /* overallocate 25% to limit the number of resize */
+ if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
+ newlen += newlen / 4;
+ if (newlen < writer->min_length)
+ newlen = writer->min_length;
+ }
+ writer->buffer = PyUnicode_New(newlen, maxchar);
+ if (writer->buffer == NULL)
+ return -1;
+ _PyUnicodeWriter_Update(writer);
+ return 0;
+ }
+
+ if (newlen > writer->size) {
+ if (writer->flags.overallocate) {
+ /* overallocate 25% to limit the number of resize */
+ if (newlen <= (PY_SSIZE_T_MAX - newlen / 4))
+ newlen += newlen / 4;
+ if (newlen < writer->min_length)
+ newlen = writer->min_length;
+ }
- if (maxchar > writer->maxchar) {
+ if (maxchar > writer->maxchar || writer->flags.readonly) {
/* resize + widen */
newbuffer = PyUnicode_New(newlen, maxchar);
if (newbuffer == NULL)
return -1;
- PyUnicode_CopyCharacters(newbuffer, 0,
- writer->buffer, 0, writer->pos);
+ _PyUnicode_FastCopyCharacters(newbuffer, 0,
+ writer->buffer, 0, writer->pos);
Py_DECREF(writer->buffer);
+ writer->flags.readonly = 0;
}
else {
newbuffer = resize_compact(writer->buffer, newlen);
@@ -12833,25 +12867,76 @@ _PyUnicodeWriter_Prepare(_PyUnicodeWriter *writer,
_PyUnicodeWriter_Update(writer);
}
else if (maxchar > writer->maxchar) {
- if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0)
+ assert(!writer->flags.readonly);
+ newbuffer = PyUnicode_New(writer->size, maxchar);
+ if (newbuffer == NULL)
return -1;
+ _PyUnicode_FastCopyCharacters(newbuffer, 0,
+ writer->buffer, 0, writer->pos);
+ Py_DECREF(writer->buffer);
+ writer->buffer = newbuffer;
_PyUnicodeWriter_Update(writer);
}
return 0;
}
-Py_LOCAL(PyObject *)
+int
+_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
+{
+ Py_UCS4 maxchar;
+ Py_ssize_t len;
+
+ if (PyUnicode_READY(str) == -1)
+ return -1;
+ len = PyUnicode_GET_LENGTH(str);
+ if (len == 0)
+ return 0;
+ maxchar = PyUnicode_MAX_CHAR_VALUE(str);
+ if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
+ if (writer->buffer == NULL && !writer->flags.overallocate) {
+ Py_INCREF(str);
+ writer->buffer = str;
+ _PyUnicodeWriter_Update(writer);
+ writer->flags.readonly = 1;
+ writer->size = 0;
+ writer->pos += len;
+ return 0;
+ }
+ if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1)
+ return -1;
+ }
+ _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
+ str, 0, len);
+ writer->pos += len;
+ return 0;
+}
+
+PyObject *
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
{
- if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) {
- Py_DECREF(writer->buffer);
- return NULL;
+ if (writer->pos == 0) {
+ Py_XDECREF(writer->buffer);
+ Py_INCREF(unicode_empty);
+ return unicode_empty;
+ }
+ if (writer->flags.readonly) {
+ assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos);
+ return writer->buffer;
+ }
+ if (PyUnicode_GET_LENGTH(writer->buffer) != writer->pos) {
+ PyObject *newbuffer;
+ newbuffer = resize_compact(writer->buffer, writer->pos);
+ if (newbuffer == NULL) {
+ Py_DECREF(writer->buffer);
+ return NULL;
+ }
+ writer->buffer = newbuffer;
}
assert(_PyUnicode_CheckConsistency(writer->buffer, 1));
return writer->buffer;
}
-Py_LOCAL(void)
+void
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
{
Py_CLEAR(writer->buffer);
@@ -12874,14 +12959,24 @@ The substitutions are identified by braces ('{' and '}').");
static PyObject *
unicode__format__(PyObject* self, PyObject* args)
{
- PyObject *format_spec, *out;
+ PyObject *format_spec;
+ _PyUnicodeWriter writer;
+ int ret;
if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
return NULL;
- out = _PyUnicode_FormatAdvanced(self, format_spec, 0,
- PyUnicode_GET_LENGTH(format_spec));
- return out;
+ if (PyUnicode_READY(self) == -1)
+ return NULL;
+ _PyUnicodeWriter_Init(&writer, 0);
+ ret = _PyUnicode_FormatAdvancedWriter(&writer,
+ self, format_spec, 0,
+ PyUnicode_GET_LENGTH(format_spec));
+ if (ret == -1) {
+ _PyUnicodeWriter_Dealloc(&writer);
+ return NULL;
+ }
+ return _PyUnicodeWriter_Finish(&writer);
}
PyDoc_STRVAR(p_format__doc__,
@@ -13111,16 +13206,17 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
/* Returns a new reference to a PyUnicode object, or NULL on failure. */
-static PyObject *
-formatfloat(PyObject *v, int flags, int prec, int type)
+static int
+formatfloat(PyObject *v, int flags, int prec, int type,
+ PyObject **p_output, _PyUnicodeWriter *writer)
{
char *p;
- PyObject *result;
double x;
+ Py_ssize_t len;
x = PyFloat_AsDouble(v);
if (x == -1.0 && PyErr_Occurred())
- return NULL;
+ return -1;
if (prec < 0)
prec = 6;
@@ -13128,10 +13224,20 @@ formatfloat(PyObject *v, int flags, int prec, int type)
p = PyOS_double_to_string(x, type, prec,
(flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
if (p == NULL)
- return NULL;
- result = unicode_fromascii((unsigned char*)p, strlen(p));
+ return -1;
+ len = strlen(p);
+ if (writer) {
+ if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
+ return -1;
+ memcpy(writer->data + writer->pos * writer->kind,
+ p,
+ len);
+ writer->pos += len;
+ }
+ else
+ *p_output = _PyUnicode_FromASCII(p, len);
PyMem_Free(p);
- return result;
+ return 0;
}
/* formatlong() emulates the format codes d, u, o, x and X, and
@@ -13267,7 +13373,7 @@ formatlong(PyObject *val, int flags, int prec, int type)
}
if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) {
PyObject *unicode;
- unicode = unicode_fromascii((unsigned char *)buf, len);
+ unicode = _PyUnicode_FromASCII(buf, len);
Py_DECREF(result);
result = unicode;
}
@@ -13336,8 +13442,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
fmtcnt = PyUnicode_GET_LENGTH(uformat);
fmtpos = 0;
- if (_PyUnicodeWriter_Init(&writer, fmtcnt + 100, 127) < 0)
- goto onError;
+ _PyUnicodeWriter_Init(&writer, fmtcnt + 100);
if (PyTuple_Check(args)) {
arglen = PyTuple_Size(args);
@@ -13368,8 +13473,8 @@ PyUnicode_Format(PyObject *format, PyObject *args)
if (_PyUnicodeWriter_Prepare(&writer, sublen, maxchar) == -1)
goto onError;
- copy_characters(writer.buffer, writer.pos,
- uformat, nonfmtpos, sublen);
+ _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
+ uformat, nonfmtpos, sublen);
writer.pos += sublen;
}
else {
@@ -13530,6 +13635,8 @@ PyUnicode_Format(PyObject *format, PyObject *args)
"incomplete format");
goto onError;
}
+ if (fmtcnt == 0)
+ writer.flags.overallocate = 0;
if (c == '%') {
if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1)
@@ -13539,7 +13646,6 @@ PyUnicode_Format(PyObject *format, PyObject *args)
continue;
}
-
v = getnextarg(args, arglen, &argidx);
if (v == NULL)
goto onError;
@@ -13552,6 +13658,13 @@ PyUnicode_Format(PyObject *format, PyObject *args)
case 's':
case 'r':
case 'a':
+ if (PyLong_CheckExact(v) && width == -1 && prec == -1) {
+ /* Fast path */
+ if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
+ goto onError;
+ goto nextarg;
+ }
+
if (PyUnicode_CheckExact(v) && c == 's') {
temp = v;
Py_INCREF(temp);
@@ -13572,6 +13685,32 @@ PyUnicode_Format(PyObject *format, PyObject *args)
case 'o':
case 'x':
case 'X':
+ if (PyLong_CheckExact(v)
+ && width == -1 && prec == -1
+ && !(flags & (F_SIGN | F_BLANK)))
+ {
+ /* Fast path */
+ switch(c)
+ {
+ case 'd':
+ case 'i':
+ case 'u':
+ if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1)
+ goto onError;
+ goto nextarg;
+ case 'x':
+ if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1)
+ goto onError;
+ goto nextarg;
+ case 'o':
+ if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1)
+ goto onError;
+ goto nextarg;
+ default:
+ break;
+ }
+ }
+
isnumok = 0;
if (PyNumber_Check(v)) {
PyObject *iobj=NULL;
@@ -13611,10 +13750,20 @@ PyUnicode_Format(PyObject *format, PyObject *args)
case 'F':
case 'g':
case 'G':
+ if (width == -1 && prec == -1
+ && !(flags & (F_SIGN | F_BLANK)))
+ {
+ /* Fast path */
+ if (formatfloat(v, flags, prec, c, NULL, &writer) == -1)
+ goto onError;
+ goto nextarg;
+ }
+
sign = 1;
if (flags & F_ZERO)
fill = '0';
- temp = formatfloat(v, flags, prec, c);
+ if (formatfloat(v, flags, prec, c, &temp, NULL) == -1)
+ temp = NULL;
break;
case 'c':
@@ -13622,6 +13771,14 @@ PyUnicode_Format(PyObject *format, PyObject *args)
Py_UCS4 ch = formatchar(v);
if (ch == (Py_UCS4) -1)
goto onError;
+ if (width == -1 && prec == -1) {
+ /* Fast path */
+ if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1)
+ goto onError;
+ PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch);
+ writer.pos += 1;
+ goto nextarg;
+ }
temp = PyUnicode_FromOrdinal(ch);
break;
}
@@ -13638,6 +13795,16 @@ PyUnicode_Format(PyObject *format, PyObject *args)
if (temp == NULL)
goto onError;
assert (PyUnicode_Check(temp));
+
+ if (width == -1 && prec == -1
+ && !(flags & (F_SIGN | F_BLANK)))
+ {
+ /* Fast path */
+ if (_PyUnicodeWriter_WriteStr(&writer, temp) == -1)
+ goto onError;
+ goto nextarg;
+ }
+
if (PyUnicode_READY(temp) == -1) {
Py_CLEAR(temp);
goto onError;
@@ -13676,15 +13843,15 @@ PyUnicode_Format(PyObject *format, PyObject *args)
if (!(flags & F_LJUST)) {
if (sign) {
if ((width-1) > len)
- bufmaxchar = Py_MAX(bufmaxchar, fill);
+ bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill);
}
else {
if (width > len)
- bufmaxchar = Py_MAX(bufmaxchar, fill);
+ bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill);
}
}
maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len);
- bufmaxchar = Py_MAX(bufmaxchar, maxchar);
+ bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar);
buflen = width;
if (sign && len == width)
@@ -13737,8 +13904,8 @@ PyUnicode_Format(PyObject *format, PyObject *args)
}
}
- copy_characters(writer.buffer, writer.pos,
- temp, pindex, len);
+ _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos,
+ temp, pindex, len);
writer.pos += len;
if (width > len) {
sublen = width - len;
@@ -13746,6 +13913,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
writer.pos += sublen;
}
+nextarg:
if (dict && (argidx < arglen) && c != '%') {
PyErr_SetString(PyExc_TypeError,
"not all arguments converted during string formatting");