diff options
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/complexobject.c | 17 | ||||
-rw-r--r-- | Objects/floatobject.c | 27 | ||||
-rw-r--r-- | Objects/longobject.c | 307 | ||||
-rw-r--r-- | Objects/stringlib/asciilib.h | 2 | ||||
-rw-r--r-- | Objects/stringlib/unicode_format.h | 46 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 362 |
6 files changed, 540 insertions, 221 deletions
diff --git a/Objects/complexobject.c b/Objects/complexobject.c index b73dc4b..403c60c 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -699,11 +699,22 @@ static PyObject * complex__format__(PyObject* self, PyObject* args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) - return NULL; - return _PyComplex_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + return NULL; + + _PyUnicodeWriter_Init(&writer, 0); + ret = _PyComplex_FormatAdvancedWriter( + &writer, + self, + format_spec, 0, PyUnicode_GET_LENGTH(format_spec)); + if (ret == -1) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_Finish(&writer); } #if 0 diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 07d31b2..3c742c3 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -267,13 +267,15 @@ static PyObject * float_repr(PyFloatObject *v) { PyObject *result; - char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v), - 'r', 0, - Py_DTSF_ADD_DOT_0, - NULL); + char *buf; + + buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v), + 'r', 0, + Py_DTSF_ADD_DOT_0, + NULL); if (!buf) return PyErr_NoMemory(); - result = PyUnicode_FromString(buf); + result = _PyUnicode_FromASCII(buf, strlen(buf)); PyMem_Free(buf); return result; } @@ -1703,11 +1705,22 @@ static PyObject * float__format__(PyObject *self, PyObject *args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - return _PyFloat_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + + _PyUnicodeWriter_Init(&writer, 0); + ret = _PyFloat_FormatAdvancedWriter( + &writer, + self, + format_spec, 0, PyUnicode_GET_LENGTH(format_spec)); + if (ret == -1) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_Finish(&writer); } PyDoc_STRVAR(float__format__doc, diff --git a/Objects/longobject.c b/Objects/longobject.c index 74c59c7..1369dac 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1550,20 +1550,22 @@ divrem1(PyLongObject *a, digit n, digit *prem) string. (Return value is non-shared so that callers can modify the returned value if necessary.) */ -static PyObject * -long_to_decimal_string(PyObject *aa) +static int +long_to_decimal_string_internal(PyObject *aa, + PyObject **p_output, + _PyUnicodeWriter *writer) { PyLongObject *scratch, *a; PyObject *str; Py_ssize_t size, strlen, size_a, i, j; digit *pout, *pin, rem, tenpow; - unsigned char *p; int negative; + enum PyUnicode_Kind kind; a = (PyLongObject *)aa; if (a == NULL || !PyLong_Check(a)) { PyErr_BadInternalCall(); - return NULL; + return -1; } size_a = ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; @@ -1580,13 +1582,13 @@ long_to_decimal_string(PyObject *aa) if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) { PyErr_SetString(PyExc_OverflowError, "long is too large to format"); - return NULL; + return -1; } /* the expression size_a * PyLong_SHIFT is now safe from overflow */ size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT); scratch = _PyLong_New(size); if (scratch == NULL) - return NULL; + return -1; /* convert array of base _PyLong_BASE digits in pin to an array of base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP, @@ -1609,7 +1611,7 @@ long_to_decimal_string(PyObject *aa) /* check for keyboard interrupt */ SIGCHECK({ Py_DECREF(scratch); - return NULL; + return -1; }); } /* pout should have at least one digit, so that the case when a = 0 @@ -1625,65 +1627,113 @@ long_to_decimal_string(PyObject *aa) tenpow *= 10; strlen++; } - str = PyUnicode_New(strlen, '9'); - if (str == NULL) { - Py_DECREF(scratch); - return NULL; + if (writer) { + if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) + return -1; + kind = writer->kind; + str = NULL; } + else { + str = PyUnicode_New(strlen, '9'); + if (str == NULL) { + Py_DECREF(scratch); + return -1; + } + kind = PyUnicode_KIND(str); + } + +#define WRITE_DIGITS(TYPE) \ + do { \ + if (writer) \ + p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \ + else \ + p = (TYPE*)PyUnicode_DATA(str) + strlen; \ + \ + *p = '\0'; \ + /* pout[0] through pout[size-2] contribute exactly \ + _PyLong_DECIMAL_SHIFT digits each */ \ + for (i=0; i < size - 1; i++) { \ + rem = pout[i]; \ + for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { \ + *--p = '0' + rem % 10; \ + rem /= 10; \ + } \ + } \ + /* pout[size-1]: always produce at least one decimal digit */ \ + rem = pout[i]; \ + do { \ + *--p = '0' + rem % 10; \ + rem /= 10; \ + } while (rem != 0); \ + \ + /* and sign */ \ + if (negative) \ + *--p = '-'; \ + \ + /* check we've counted correctly */ \ + if (writer) \ + assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \ + else \ + assert(p == (TYPE*)PyUnicode_DATA(str)); \ + } while (0) /* fill the string right-to-left */ - assert(PyUnicode_KIND(str) == PyUnicode_1BYTE_KIND); - p = PyUnicode_1BYTE_DATA(str) + strlen; - *p = '\0'; - /* pout[0] through pout[size-2] contribute exactly - _PyLong_DECIMAL_SHIFT digits each */ - for (i=0; i < size - 1; i++) { - rem = pout[i]; - for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { - *--p = '0' + rem % 10; - rem /= 10; - } + if (kind == PyUnicode_1BYTE_KIND) { + Py_UCS1 *p; + WRITE_DIGITS(Py_UCS1); } - /* pout[size-1]: always produce at least one decimal digit */ - rem = pout[i]; - do { - *--p = '0' + rem % 10; - rem /= 10; - } while (rem != 0); - - /* and sign */ - if (negative) - *--p = '-'; + else if (kind == PyUnicode_2BYTE_KIND) { + Py_UCS2 *p; + WRITE_DIGITS(Py_UCS2); + } + else { + assert (kind == PyUnicode_4BYTE_KIND); + Py_UCS4 *p; + WRITE_DIGITS(Py_UCS4); + } +#undef WRITE_DIGITS - /* check we've counted correctly */ - assert(p == PyUnicode_1BYTE_DATA(str)); - assert(_PyUnicode_CheckConsistency(str, 1)); Py_DECREF(scratch); - return (PyObject *)str; + if (writer) { + writer->pos += strlen; + } + else { + assert(_PyUnicode_CheckConsistency(str, 1)); + *p_output = (PyObject *)str; + } + return 0; +} + +static PyObject * +long_to_decimal_string(PyObject *aa) +{ + PyObject *v; + if (long_to_decimal_string_internal(aa, &v, NULL) == -1) + return NULL; + return v; } /* Convert a long int object to a string, using a given conversion base, - which should be one of 2, 8, 10 or 16. Return a string object. - If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'. */ + which should be one of 2, 8 or 16. Return a string object. + If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x' + if alternate is nonzero. */ -PyObject * -_PyLong_Format(PyObject *aa, int base) +static int +long_format_binary(PyObject *aa, int base, int alternate, + PyObject **p_output, _PyUnicodeWriter *writer) { register PyLongObject *a = (PyLongObject *)aa; PyObject *v; Py_ssize_t sz; Py_ssize_t size_a; - Py_UCS1 *p; + enum PyUnicode_Kind kind; int negative; int bits; - assert(base == 2 || base == 8 || base == 10 || base == 16); - if (base == 10) - return long_to_decimal_string((PyObject *)a); - + assert(base == 2 || base == 8 || base == 16); if (a == NULL || !PyLong_Check(a)) { PyErr_BadInternalCall(); - return NULL; + return -1; } size_a = ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; @@ -1706,7 +1756,7 @@ _PyLong_Format(PyObject *aa, int base) /* Compute exact length 'sz' of output string. */ if (size_a == 0) { - sz = 3; + sz = 1; } else { Py_ssize_t size_a_in_bits; @@ -1714,56 +1764,126 @@ _PyLong_Format(PyObject *aa, int base) if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT) { PyErr_SetString(PyExc_OverflowError, "int is too large to format"); - return NULL; + return -1; } size_a_in_bits = (size_a - 1) * PyLong_SHIFT + bits_in_digit(a->ob_digit[size_a - 1]); - /* Allow 2 characters for prefix and 1 for a '-' sign. */ - sz = 2 + negative + (size_a_in_bits + (bits - 1)) / bits; + /* Allow 1 character for a '-' sign. */ + sz = negative + (size_a_in_bits + (bits - 1)) / bits; + } + if (alternate) { + /* 2 characters for prefix */ + sz += 2; } - v = PyUnicode_New(sz, 'x'); - if (v == NULL) { - return NULL; + if (writer) { + if (_PyUnicodeWriter_Prepare(writer, sz, 'x') == -1) + return -1; + kind = writer->kind; + v = NULL; + } + else { + v = PyUnicode_New(sz, 'x'); + if (v == NULL) + return -1; + kind = PyUnicode_KIND(v); + } + +#define WRITE_DIGITS(TYPE) \ + do { \ + if (writer) \ + p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \ + else \ + p = (TYPE*)PyUnicode_DATA(v) + sz; \ + \ + if (size_a == 0) { \ + *--p = '0'; \ + } \ + else { \ + /* JRH: special case for power-of-2 bases */ \ + twodigits accum = 0; \ + int accumbits = 0; /* # of bits in accum */ \ + Py_ssize_t i; \ + for (i = 0; i < size_a; ++i) { \ + accum |= (twodigits)a->ob_digit[i] << accumbits; \ + accumbits += PyLong_SHIFT; \ + assert(accumbits >= bits); \ + do { \ + char cdigit; \ + cdigit = (char)(accum & (base - 1)); \ + cdigit += (cdigit < 10) ? '0' : 'a'-10; \ + *--p = cdigit; \ + accumbits -= bits; \ + accum >>= bits; \ + } while (i < size_a-1 ? accumbits >= bits : accum > 0); \ + } \ + } \ + \ + if (alternate) { \ + if (base == 16) \ + *--p = 'x'; \ + else if (base == 8) \ + *--p = 'o'; \ + else /* (base == 2) */ \ + *--p = 'b'; \ + *--p = '0'; \ + } \ + if (negative) \ + *--p = '-'; \ + if (writer) \ + assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \ + else \ + assert(p == (TYPE*)PyUnicode_DATA(v)); \ + } while (0) + + if (kind == PyUnicode_1BYTE_KIND) { + Py_UCS1 *p; + WRITE_DIGITS(Py_UCS1); + } + else if (kind == PyUnicode_2BYTE_KIND) { + Py_UCS2 *p; + WRITE_DIGITS(Py_UCS2); } - assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); + else { + assert (kind == PyUnicode_4BYTE_KIND); + Py_UCS4 *p; + WRITE_DIGITS(Py_UCS4); + } +#undef WRITE_DIGITS - p = PyUnicode_1BYTE_DATA(v) + sz; - if (size_a == 0) { - *--p = '0'; + if (writer) { + writer->pos += sz; } else { - /* JRH: special case for power-of-2 bases */ - twodigits accum = 0; - int accumbits = 0; /* # of bits in accum */ - Py_ssize_t i; - for (i = 0; i < size_a; ++i) { - accum |= (twodigits)a->ob_digit[i] << accumbits; - accumbits += PyLong_SHIFT; - assert(accumbits >= bits); - do { - char cdigit; - cdigit = (char)(accum & (base - 1)); - cdigit += (cdigit < 10) ? '0' : 'a'-10; - *--p = cdigit; - accumbits -= bits; - accum >>= bits; - } while (i < size_a-1 ? accumbits >= bits : accum > 0); - } + assert(_PyUnicode_CheckConsistency(v, 1)); + *p_output = v; } + return 0; +} - if (base == 16) - *--p = 'x'; - else if (base == 8) - *--p = 'o'; - else /* (base == 2) */ - *--p = 'b'; - *--p = '0'; - if (negative) - *--p = '-'; - assert(p == PyUnicode_1BYTE_DATA(v)); - assert(_PyUnicode_CheckConsistency(v, 1)); - return v; +PyObject * +_PyLong_Format(PyObject *obj, int base) +{ + PyObject *str; + int err; + if (base == 10) + err = long_to_decimal_string_internal(obj, &str, NULL); + else + err = long_format_binary(obj, base, 1, &str, NULL); + if (err == -1) + return NULL; + return str; +} + +int +_PyLong_FormatWriter(_PyUnicodeWriter *writer, + PyObject *obj, + int base, int alternate) +{ + if (base == 10) + return long_to_decimal_string_internal(obj, NULL, writer); + else + return long_format_binary(obj, base, alternate, NULL, writer); } /* Table of digit values for 8-bit string -> integer conversion. @@ -4232,11 +4352,22 @@ static PyObject * long__format__(PyObject *self, PyObject *args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - return _PyLong_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + + _PyUnicodeWriter_Init(&writer, 0); + ret = _PyLong_FormatAdvancedWriter( + &writer, + self, + format_spec, 0, PyUnicode_GET_LENGTH(format_spec)); + if (ret == -1) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_Finish(&writer); } /* Return a pair (q, r) such that a = b * q + r, and diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h index ab5bae7..f62813d 100644 --- a/Objects/stringlib/asciilib.h +++ b/Objects/stringlib/asciilib.h @@ -18,7 +18,7 @@ #define STRINGLIB_TODECIMAL Py_UNICODE_TODECIMAL #define STRINGLIB_STR PyUnicode_1BYTE_DATA #define STRINGLIB_LEN PyUnicode_GET_LENGTH -#define STRINGLIB_NEW unicode_fromascii +#define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN)) #define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index 9c0b0cf..d71cf44 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -499,26 +499,26 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write int ok = 0; PyObject *result = NULL; PyObject *format_spec_object = NULL; - PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; - Py_ssize_t len; + int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; + int err; /* If we know the type exactly, skip the lookup of __format__ and just call the formatter directly. */ if (PyUnicode_CheckExact(fieldobj)) - formatter = _PyUnicode_FormatAdvanced; + formatter = _PyUnicode_FormatAdvancedWriter; else if (PyLong_CheckExact(fieldobj)) - formatter =_PyLong_FormatAdvanced; + formatter = _PyLong_FormatAdvancedWriter; else if (PyFloat_CheckExact(fieldobj)) - formatter = _PyFloat_FormatAdvanced; - - /* XXX: for 2.6, convert format_spec to the appropriate type - (unicode, str) */ + formatter = _PyFloat_FormatAdvancedWriter; + else if (PyComplex_CheckExact(fieldobj)) + formatter = _PyComplex_FormatAdvancedWriter; if (formatter) { /* we know exactly which formatter will be called when __format__ is looked up, so call it directly, instead. */ - result = formatter(fieldobj, format_spec->str, - format_spec->start, format_spec->end); + err = formatter(writer, fieldobj, format_spec->str, + format_spec->start, format_spec->end); + return (err == 0); } else { /* We need to create an object out of the pointers we have, because @@ -536,17 +536,11 @@ render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *write } if (result == NULL) goto done; - if (PyUnicode_READY(result) == -1) - goto done; - len = PyUnicode_GET_LENGTH(result); - if (_PyUnicodeWriter_Prepare(writer, - len, PyUnicode_MAX_CHAR_VALUE(result)) == -1) + if (_PyUnicodeWriter_WriteStr(writer, result) == -1) goto done; - copy_characters(writer->buffer, writer->pos, - result, 0, len); - writer->pos += len; ok = 1; + done: Py_XDECREF(format_spec_object); Py_XDECREF(result); @@ -897,16 +891,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar); if (err == -1) return 0; - copy_characters(writer->buffer, writer->pos, - literal.str, literal.start, sublen); + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + literal.str, literal.start, sublen); writer->pos += sublen; } - if (field_present) + if (field_present) { + if (iter.str.start == iter.str.end) + writer->flags.overallocate = 0; if (!output_markup(&field_name, &format_spec, format_spec_needs_expanding, conversion, writer, args, kwargs, recursion_depth, auto_number)) return 0; + } } return result; } @@ -921,7 +918,7 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs, int recursion_depth, AutoNumber *auto_number) { _PyUnicodeWriter writer; - Py_ssize_t initlen; + Py_ssize_t minlen; /* check the recursion level */ if (recursion_depth <= 0) { @@ -930,9 +927,8 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs, return NULL; } - initlen = PyUnicode_GET_LENGTH(input->str) + 100; - if (_PyUnicodeWriter_Init(&writer, initlen, 127) == -1) - return NULL; + minlen = PyUnicode_GET_LENGTH(input->str) + 100; + _PyUnicodeWriter_Init(&writer, minlen); if (!do_markup(input, args, kwargs, &writer, recursion_depth, auto_number)) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8fbc203..00bfff2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -225,16 +225,10 @@ const unsigned char _Py_ascii_whitespace[] = { /* forward */ static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length); static PyObject* get_latin1_char(unsigned char ch); -static void copy_characters( - PyObject *to, Py_ssize_t to_start, - PyObject *from, Py_ssize_t from_start, - Py_ssize_t how_many); static int unicode_modifiable(PyObject *unicode); static PyObject * -unicode_fromascii(const unsigned char *s, Py_ssize_t size); -static PyObject * _PyUnicode_FromUCS1(const unsigned char *s, Py_ssize_t size); static PyObject * _PyUnicode_FromUCS2(const Py_UCS2 *s, Py_ssize_t size); @@ -783,7 +777,7 @@ resize_copy(PyObject *unicode, Py_ssize_t length) return NULL; copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); - copy_characters(copy, 0, unicode, 0, copy_length); + _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); return copy; } else { @@ -1154,15 +1148,16 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, assert(0 <= from_start); assert(0 <= to_start); assert(PyUnicode_Check(from)); - assert(PyUnicode_Check(to)); assert(PyUnicode_IS_READY(from)); - assert(PyUnicode_IS_READY(to)); assert(from_start + how_many <= PyUnicode_GET_LENGTH(from)); - assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); if (how_many == 0) return 0; + assert(PyUnicode_Check(to)); + assert(PyUnicode_IS_READY(to)); + assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); + from_kind = PyUnicode_KIND(from); from_data = PyUnicode_DATA(from); to_kind = PyUnicode_KIND(to); @@ -1267,10 +1262,10 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, return 0; } -static void -copy_characters(PyObject *to, Py_ssize_t to_start, - PyObject *from, Py_ssize_t from_start, - Py_ssize_t how_many) +void +_PyUnicode_FastCopyCharacters( + PyObject *to, Py_ssize_t to_start, + PyObject *from, Py_ssize_t from_start, Py_ssize_t how_many) { (void)_copy_characters(to, to_start, from, from_start, how_many, 0); } @@ -1292,6 +1287,14 @@ PyUnicode_CopyCharacters(PyObject *to, Py_ssize_t to_start, if (PyUnicode_READY(to) == -1) return -1; + if (from_start < 0) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return -1; + } + if (to_start < 0) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return -1; + } how_many = Py_MIN(PyUnicode_GET_LENGTH(from), how_many); if (to_start + how_many > PyUnicode_GET_LENGTH(to)) { PyErr_Format(PyExc_SystemError, @@ -1641,7 +1644,7 @@ unicode_widen(PyObject **p_unicode, Py_ssize_t length, maxchar); if (result == NULL) return -1; - PyUnicode_CopyCharacters(result, 0, *p_unicode, 0, length); + _PyUnicode_FastCopyCharacters(result, 0, *p_unicode, 0, length); Py_DECREF(*p_unicode); *p_unicode = result; return 0; @@ -1841,9 +1844,10 @@ _PyUnicode_ClearStaticStrings() /* Internal function, doesn't check maximum character */ -static PyObject* -unicode_fromascii(const unsigned char* s, Py_ssize_t size) +PyObject* +_PyUnicode_FromASCII(const char *buffer, Py_ssize_t size) { + const unsigned char *s = (const unsigned char *)buffer; PyObject *unicode; if (size == 1) { #ifdef Py_DEBUG @@ -2085,7 +2089,7 @@ unicode_adjust_maxchar(PyObject **p_unicode) return; } copy = PyUnicode_New(len, max_char); - copy_characters(copy, 0, unicode, 0, len); + _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len); Py_DECREF(unicode); *p_unicode = copy; } @@ -2753,7 +2757,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) (void) va_arg(vargs, char *); size = PyUnicode_GET_LENGTH(*callresult); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - copy_characters(string, i, *callresult, 0, size); + _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); i += size; /* We're done with the unicode()/repr() => forget it */ Py_DECREF(*callresult); @@ -2767,7 +2771,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) Py_ssize_t size; assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); size = PyUnicode_GET_LENGTH(obj); - copy_characters(string, i, obj, 0, size); + _PyUnicode_FastCopyCharacters(string, i, obj, 0, size); i += size; break; } @@ -2779,13 +2783,13 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) if (obj) { size = PyUnicode_GET_LENGTH(obj); assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); - copy_characters(string, i, obj, 0, size); + _PyUnicode_FastCopyCharacters(string, i, obj, 0, size); i += size; } else { size = PyUnicode_GET_LENGTH(*callresult); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - copy_characters(string, i, *callresult, 0, size); + _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); i += size; Py_DECREF(*callresult); } @@ -2800,7 +2804,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) /* unused, since we already have the result */ (void) va_arg(vargs, PyObject *); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - copy_characters(string, i, *callresult, 0, size); + _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); i += size; /* We're done with the unicode()/repr() => forget it */ Py_DECREF(*callresult); @@ -4171,7 +4175,7 @@ unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler, if (unicode_widen(output, *outpos, PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0) goto onError; - copy_characters(*output, *outpos, repunicode, 0, replen); + _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen); *outpos += replen; } else { @@ -9216,12 +9220,14 @@ fixup(PyObject *self, /* If the maxchar increased so that the kind changed, not all characters are representable anymore and we need to fix the string again. This only happens in very few cases. */ - copy_characters(v, 0, self, 0, PyUnicode_GET_LENGTH(self)); + _PyUnicode_FastCopyCharacters(v, 0, + self, 0, PyUnicode_GET_LENGTH(self)); maxchar_old = fixfct(v); assert(maxchar_old > 0 && maxchar_old <= maxchar_new); } else { - copy_characters(v, 0, u, 0, PyUnicode_GET_LENGTH(self)); + _PyUnicode_FastCopyCharacters(v, 0, + u, 0, PyUnicode_GET_LENGTH(self)); } Py_DECREF(u); assert(_PyUnicode_CheckConsistency(v, 1)); @@ -9603,7 +9609,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) res_data += kind * seplen; } else { - copy_characters(res, res_offset, sep, 0, seplen); + _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen); res_offset += seplen; } } @@ -9616,7 +9622,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) res_data += kind * itemlen; } else { - copy_characters(res, res_offset, item, 0, itemlen); + _PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen); res_offset += itemlen; } } @@ -9663,13 +9669,25 @@ PyUnicode_Join(PyObject *separator, PyObject *seq) } \ } while (0) +void +_PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, + Py_UCS4 fill_char) +{ + const enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); + const void *data = PyUnicode_DATA(unicode); + assert(PyUnicode_IS_READY(unicode)); + assert(unicode_modifiable(unicode)); + assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode)); + assert(start >= 0); + assert(start + length <= PyUnicode_GET_LENGTH(unicode)); + FILL(kind, data, fill_char, start, length); +} + Py_ssize_t PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, Py_UCS4 fill_char) { Py_ssize_t maxlen; - enum PyUnicode_Kind kind; - void *data; if (!PyUnicode_Check(unicode)) { PyErr_BadInternalCall(); @@ -9680,6 +9698,10 @@ PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, if (unicode_check_modifiable(unicode)) return -1; + if (start < 0) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return -1; + } if (fill_char > PyUnicode_MAX_CHAR_VALUE(unicode)) { PyErr_SetString(PyExc_ValueError, "fill character is bigger than " @@ -9692,9 +9714,7 @@ PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, if (length <= 0) return 0; - kind = PyUnicode_KIND(unicode); - data = PyUnicode_DATA(unicode); - FILL(kind, data, fill_char, start, length); + _PyUnicode_FastFill(unicode, start, length, fill_char); return length; } @@ -9734,7 +9754,7 @@ pad(PyObject *self, FILL(kind, data, fill, 0, left); if (right) FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right); - copy_characters(u, left, self, 0, _PyUnicode_LENGTH(self)); + _PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self)); assert(_PyUnicode_CheckConsistency(u, 1)); return u; } @@ -10058,7 +10078,7 @@ replace(PyObject *self, PyObject *str1, u = PyUnicode_New(slen, maxchar); if (!u) goto error; - copy_characters(u, 0, self, 0, slen); + _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen); rkind = PyUnicode_KIND(u); PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2); @@ -10626,8 +10646,8 @@ PyUnicode_Concat(PyObject *left, PyObject *right) w = PyUnicode_New(new_len, maxchar); if (w == NULL) goto onError; - copy_characters(w, 0, u, 0, u_len); - copy_characters(w, u_len, v, 0, v_len); + _PyUnicode_FastCopyCharacters(w, 0, u, 0, u_len); + _PyUnicode_FastCopyCharacters(w, u_len, v, 0, v_len); Py_DECREF(u); Py_DECREF(v); assert(_PyUnicode_CheckConsistency(w, 1)); @@ -10702,7 +10722,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) goto error; } /* copy 'right' into the newly allocated area of 'left' */ - copy_characters(*p_left, left_len, right, 0, right_len); + _PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len); } else { maxchar = PyUnicode_MAX_CHAR_VALUE(left); @@ -10713,8 +10733,8 @@ PyUnicode_Append(PyObject **p_left, PyObject *right) res = PyUnicode_New(new_len, maxchar); if (res == NULL) goto error; - copy_characters(res, 0, left, 0, left_len); - copy_characters(res, left_len, right, 0, right_len); + _PyUnicode_FastCopyCharacters(res, 0, left, 0, left_len); + _PyUnicode_FastCopyCharacters(res, left_len, right, 0, right_len); Py_DECREF(left); *p_left = res; } @@ -11650,7 +11670,7 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end) length = end - start; if (PyUnicode_IS_ASCII(self)) { data = PyUnicode_1BYTE_DATA(self); - return unicode_fromascii(data + start, length); + return _PyUnicode_FromASCII((char*)(data + start), length); } else { kind = PyUnicode_KIND(self); @@ -12769,60 +12789,74 @@ unicode_endswith(PyObject *self, return PyBool_FromLong(result); } -typedef struct { - PyObject *buffer; - void *data; - enum PyUnicode_Kind kind; - Py_UCS4 maxchar; - Py_ssize_t pos; -} _PyUnicodeWriter ; - Py_LOCAL_INLINE(void) _PyUnicodeWriter_Update(_PyUnicodeWriter *writer) { + writer->size = PyUnicode_GET_LENGTH(writer->buffer); writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer); writer->data = PyUnicode_DATA(writer->buffer); writer->kind = PyUnicode_KIND(writer->buffer); } -Py_LOCAL(int) -_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, - Py_ssize_t length, Py_UCS4 maxchar) +void +_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length) { - writer->pos = 0; - writer->buffer = PyUnicode_New(length, maxchar); - if (writer->buffer == NULL) - return -1; - _PyUnicodeWriter_Update(writer); - return 0; + memset(writer, 0, sizeof(*writer)); +#ifdef Py_DEBUG + writer->kind = 5; /* invalid kind */ +#endif + writer->min_length = Py_MAX(min_length, 100); + writer->flags.overallocate = (min_length > 0); } -Py_LOCAL_INLINE(int) -_PyUnicodeWriter_Prepare(_PyUnicodeWriter *writer, - Py_ssize_t length, Py_UCS4 maxchar) +int +_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar) { Py_ssize_t newlen; PyObject *newbuffer; + assert(length > 0); + if (length > PY_SSIZE_T_MAX - writer->pos) { PyErr_NoMemory(); return -1; } newlen = writer->pos + length; - if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) { - /* overallocate 25% to limit the number of resize */ - if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) - newlen += newlen / 4; + if (writer->buffer == NULL) { + if (writer->flags.overallocate) { + /* overallocate 25% to limit the number of resize */ + if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) + newlen += newlen / 4; + if (newlen < writer->min_length) + newlen = writer->min_length; + } + writer->buffer = PyUnicode_New(newlen, maxchar); + if (writer->buffer == NULL) + return -1; + _PyUnicodeWriter_Update(writer); + return 0; + } + + if (newlen > writer->size) { + if (writer->flags.overallocate) { + /* overallocate 25% to limit the number of resize */ + if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) + newlen += newlen / 4; + if (newlen < writer->min_length) + newlen = writer->min_length; + } - if (maxchar > writer->maxchar) { + if (maxchar > writer->maxchar || writer->flags.readonly) { /* resize + widen */ newbuffer = PyUnicode_New(newlen, maxchar); if (newbuffer == NULL) return -1; - PyUnicode_CopyCharacters(newbuffer, 0, - writer->buffer, 0, writer->pos); + _PyUnicode_FastCopyCharacters(newbuffer, 0, + writer->buffer, 0, writer->pos); Py_DECREF(writer->buffer); + writer->flags.readonly = 0; } else { newbuffer = resize_compact(writer->buffer, newlen); @@ -12833,25 +12867,76 @@ _PyUnicodeWriter_Prepare(_PyUnicodeWriter *writer, _PyUnicodeWriter_Update(writer); } else if (maxchar > writer->maxchar) { - if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0) + assert(!writer->flags.readonly); + newbuffer = PyUnicode_New(writer->size, maxchar); + if (newbuffer == NULL) return -1; + _PyUnicode_FastCopyCharacters(newbuffer, 0, + writer->buffer, 0, writer->pos); + Py_DECREF(writer->buffer); + writer->buffer = newbuffer; _PyUnicodeWriter_Update(writer); } return 0; } -Py_LOCAL(PyObject *) +int +_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) +{ + Py_UCS4 maxchar; + Py_ssize_t len; + + if (PyUnicode_READY(str) == -1) + return -1; + len = PyUnicode_GET_LENGTH(str); + if (len == 0) + return 0; + maxchar = PyUnicode_MAX_CHAR_VALUE(str); + if (maxchar > writer->maxchar || len > writer->size - writer->pos) { + if (writer->buffer == NULL && !writer->flags.overallocate) { + Py_INCREF(str); + writer->buffer = str; + _PyUnicodeWriter_Update(writer); + writer->flags.readonly = 1; + writer->size = 0; + writer->pos += len; + return 0; + } + if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1) + return -1; + } + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + str, 0, len); + writer->pos += len; + return 0; +} + +PyObject * _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) { - if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) { - Py_DECREF(writer->buffer); - return NULL; + if (writer->pos == 0) { + Py_XDECREF(writer->buffer); + Py_INCREF(unicode_empty); + return unicode_empty; + } + if (writer->flags.readonly) { + assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos); + return writer->buffer; + } + if (PyUnicode_GET_LENGTH(writer->buffer) != writer->pos) { + PyObject *newbuffer; + newbuffer = resize_compact(writer->buffer, writer->pos); + if (newbuffer == NULL) { + Py_DECREF(writer->buffer); + return NULL; + } + writer->buffer = newbuffer; } assert(_PyUnicode_CheckConsistency(writer->buffer, 1)); return writer->buffer; } -Py_LOCAL(void) +void _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer) { Py_CLEAR(writer->buffer); @@ -12874,14 +12959,24 @@ The substitutions are identified by braces ('{' and '}')."); static PyObject * unicode__format__(PyObject* self, PyObject* args) { - PyObject *format_spec, *out; + PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - out = _PyUnicode_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); - return out; + if (PyUnicode_READY(self) == -1) + return NULL; + _PyUnicodeWriter_Init(&writer, 0); + ret = _PyUnicode_FormatAdvancedWriter(&writer, + self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec)); + if (ret == -1) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_Finish(&writer); } PyDoc_STRVAR(p_format__doc__, @@ -13111,16 +13206,17 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) /* Returns a new reference to a PyUnicode object, or NULL on failure. */ -static PyObject * -formatfloat(PyObject *v, int flags, int prec, int type) +static int +formatfloat(PyObject *v, int flags, int prec, int type, + PyObject **p_output, _PyUnicodeWriter *writer) { char *p; - PyObject *result; double x; + Py_ssize_t len; x = PyFloat_AsDouble(v); if (x == -1.0 && PyErr_Occurred()) - return NULL; + return -1; if (prec < 0) prec = 6; @@ -13128,10 +13224,20 @@ formatfloat(PyObject *v, int flags, int prec, int type) p = PyOS_double_to_string(x, type, prec, (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); if (p == NULL) - return NULL; - result = unicode_fromascii((unsigned char*)p, strlen(p)); + return -1; + len = strlen(p); + if (writer) { + if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) + return -1; + memcpy(writer->data + writer->pos * writer->kind, + p, + len); + writer->pos += len; + } + else + *p_output = _PyUnicode_FromASCII(p, len); PyMem_Free(p); - return result; + return 0; } /* formatlong() emulates the format codes d, u, o, x and X, and @@ -13267,7 +13373,7 @@ formatlong(PyObject *val, int flags, int prec, int type) } if (!PyUnicode_Check(result) || len != PyUnicode_GET_LENGTH(result)) { PyObject *unicode; - unicode = unicode_fromascii((unsigned char *)buf, len); + unicode = _PyUnicode_FromASCII(buf, len); Py_DECREF(result); result = unicode; } @@ -13336,8 +13442,7 @@ PyUnicode_Format(PyObject *format, PyObject *args) fmtcnt = PyUnicode_GET_LENGTH(uformat); fmtpos = 0; - if (_PyUnicodeWriter_Init(&writer, fmtcnt + 100, 127) < 0) - goto onError; + _PyUnicodeWriter_Init(&writer, fmtcnt + 100); if (PyTuple_Check(args)) { arglen = PyTuple_Size(args); @@ -13368,8 +13473,8 @@ PyUnicode_Format(PyObject *format, PyObject *args) if (_PyUnicodeWriter_Prepare(&writer, sublen, maxchar) == -1) goto onError; - copy_characters(writer.buffer, writer.pos, - uformat, nonfmtpos, sublen); + _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos, + uformat, nonfmtpos, sublen); writer.pos += sublen; } else { @@ -13530,6 +13635,8 @@ PyUnicode_Format(PyObject *format, PyObject *args) "incomplete format"); goto onError; } + if (fmtcnt == 0) + writer.flags.overallocate = 0; if (c == '%') { if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1) @@ -13539,7 +13646,6 @@ PyUnicode_Format(PyObject *format, PyObject *args) continue; } - v = getnextarg(args, arglen, &argidx); if (v == NULL) goto onError; @@ -13552,6 +13658,13 @@ PyUnicode_Format(PyObject *format, PyObject *args) case 's': case 'r': case 'a': + if (PyLong_CheckExact(v) && width == -1 && prec == -1) { + /* Fast path */ + if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1) + goto onError; + goto nextarg; + } + if (PyUnicode_CheckExact(v) && c == 's') { temp = v; Py_INCREF(temp); @@ -13572,6 +13685,32 @@ PyUnicode_Format(PyObject *format, PyObject *args) case 'o': case 'x': case 'X': + if (PyLong_CheckExact(v) + && width == -1 && prec == -1 + && !(flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + switch(c) + { + case 'd': + case 'i': + case 'u': + if (_PyLong_FormatWriter(&writer, v, 10, flags & F_ALT) == -1) + goto onError; + goto nextarg; + case 'x': + if (_PyLong_FormatWriter(&writer, v, 16, flags & F_ALT) == -1) + goto onError; + goto nextarg; + case 'o': + if (_PyLong_FormatWriter(&writer, v, 8, flags & F_ALT) == -1) + goto onError; + goto nextarg; + default: + break; + } + } + isnumok = 0; if (PyNumber_Check(v)) { PyObject *iobj=NULL; @@ -13611,10 +13750,20 @@ PyUnicode_Format(PyObject *format, PyObject *args) case 'F': case 'g': case 'G': + if (width == -1 && prec == -1 + && !(flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + if (formatfloat(v, flags, prec, c, NULL, &writer) == -1) + goto onError; + goto nextarg; + } + sign = 1; if (flags & F_ZERO) fill = '0'; - temp = formatfloat(v, flags, prec, c); + if (formatfloat(v, flags, prec, c, &temp, NULL) == -1) + temp = NULL; break; case 'c': @@ -13622,6 +13771,14 @@ PyUnicode_Format(PyObject *format, PyObject *args) Py_UCS4 ch = formatchar(v); if (ch == (Py_UCS4) -1) goto onError; + if (width == -1 && prec == -1) { + /* Fast path */ + if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) + goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); + writer.pos += 1; + goto nextarg; + } temp = PyUnicode_FromOrdinal(ch); break; } @@ -13638,6 +13795,16 @@ PyUnicode_Format(PyObject *format, PyObject *args) if (temp == NULL) goto onError; assert (PyUnicode_Check(temp)); + + if (width == -1 && prec == -1 + && !(flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + if (_PyUnicodeWriter_WriteStr(&writer, temp) == -1) + goto onError; + goto nextarg; + } + if (PyUnicode_READY(temp) == -1) { Py_CLEAR(temp); goto onError; @@ -13676,15 +13843,15 @@ PyUnicode_Format(PyObject *format, PyObject *args) if (!(flags & F_LJUST)) { if (sign) { if ((width-1) > len) - bufmaxchar = Py_MAX(bufmaxchar, fill); + bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); } else { if (width > len) - bufmaxchar = Py_MAX(bufmaxchar, fill); + bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); } } maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len); - bufmaxchar = Py_MAX(bufmaxchar, maxchar); + bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar); buflen = width; if (sign && len == width) @@ -13737,8 +13904,8 @@ PyUnicode_Format(PyObject *format, PyObject *args) } } - copy_characters(writer.buffer, writer.pos, - temp, pindex, len); + _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos, + temp, pindex, len); writer.pos += len; if (width > len) { sublen = width - len; @@ -13746,6 +13913,7 @@ PyUnicode_Format(PyObject *format, PyObject *args) writer.pos += sublen; } +nextarg: if (dict && (argidx < arglen) && c != '%') { PyErr_SetString(PyExc_TypeError, "not all arguments converted during string formatting"); |