diff options
Diffstat (limited to 'Objects/stringlib')
-rw-r--r-- | Objects/stringlib/asciilib.h | 1 | ||||
-rw-r--r-- | Objects/stringlib/codecs.h | 12 | ||||
-rw-r--r-- | Objects/stringlib/eq.h | 4 | ||||
-rw-r--r-- | Objects/stringlib/fastsearch.h | 8 | ||||
-rw-r--r-- | Objects/stringlib/find_max_char.h | 4 | ||||
-rw-r--r-- | Objects/stringlib/join.h | 133 | ||||
-rw-r--r-- | Objects/stringlib/partition.h | 10 | ||||
-rw-r--r-- | Objects/stringlib/replace.h | 53 | ||||
-rw-r--r-- | Objects/stringlib/split.h | 4 | ||||
-rw-r--r-- | Objects/stringlib/stringdefs.h | 1 | ||||
-rw-r--r-- | Objects/stringlib/transmogrify.h | 8 | ||||
-rw-r--r-- | Objects/stringlib/ucs1lib.h | 1 | ||||
-rw-r--r-- | Objects/stringlib/ucs2lib.h | 1 | ||||
-rw-r--r-- | Objects/stringlib/ucs4lib.h | 1 | ||||
-rw-r--r-- | Objects/stringlib/undef.h | 1 | ||||
-rw-r--r-- | Objects/stringlib/unicode_format.h | 139 | ||||
-rw-r--r-- | Objects/stringlib/unicodedefs.h | 6 |
17 files changed, 280 insertions, 107 deletions
diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h index f62813d..d0fc18d 100644 --- a/Objects/stringlib/asciilib.h +++ b/Objects/stringlib/asciilib.h @@ -19,7 +19,6 @@ #define STRINGLIB_STR PyUnicode_1BYTE_DATA #define STRINGLIB_LEN PyUnicode_GET_LENGTH #define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN)) -#define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h index f353367..57319c6 100644 --- a/Objects/stringlib/codecs.h +++ b/Objects/stringlib/codecs.h @@ -38,8 +38,8 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, */ if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) { /* Help register allocation */ - register const char *_s = s; - register STRINGLIB_CHAR *_p = p; + const char *_s = s; + STRINGLIB_CHAR *_p = p; while (_s < aligned_end) { /* Read a whole long at a time (either 4 or 8 bytes), and do a fast unrolled copy if it only contains ASCII @@ -47,7 +47,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end, unsigned long value = *(unsigned long *) _s; if (value & ASCII_CHAR_MASK) break; -#ifdef BYTEORDER_IS_LITTLE_ENDIAN +#if PY_LITTLE_ENDIAN _p[0] = (STRINGLIB_CHAR)(value & 0xFFu); _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu); _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu); @@ -486,7 +486,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e, const unsigned char *q = *inptr; STRINGLIB_CHAR *p = dest + *outpos; /* Offsets from q for retrieving byte pairs in the right order. */ -#ifdef BYTEORDER_IS_LITTLE_ENDIAN +#if PY_LITTLE_ENDIAN int ihi = !!native_ordering, ilo = !native_ordering; #else int ihi = !native_ordering, ilo = !!native_ordering; @@ -499,7 +499,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e, reads are more expensive, better to defer to another iteration. */ if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) { /* Fast path for runs of in-range non-surrogate chars. */ - register const unsigned char *_q = q; + const unsigned char *_q = q; while (_q < aligned_end) { unsigned long block = * (unsigned long *) _q; if (native_ordering) { @@ -517,7 +517,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e, block = SWAB(block); #endif } -#ifdef BYTEORDER_IS_LITTLE_ENDIAN +#if PY_LITTLE_ENDIAN # if SIZEOF_LONG == 4 p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu); p[1] = (STRINGLIB_CHAR)(block >> 16); diff --git a/Objects/stringlib/eq.h b/Objects/stringlib/eq.h index 3e5f510..f8fd384 100644 --- a/Objects/stringlib/eq.h +++ b/Objects/stringlib/eq.h @@ -6,8 +6,8 @@ Py_LOCAL_INLINE(int) unicode_eq(PyObject *aa, PyObject *bb) { - register PyUnicodeObject *a = (PyUnicodeObject *)aa; - register PyUnicodeObject *b = (PyUnicodeObject *)bb; + PyUnicodeObject *a = (PyUnicodeObject *)aa; + PyUnicodeObject *b = (PyUnicodeObject *)bb; if (PyUnicode_READY(a) == -1 || PyUnicode_READY(b) == -1) { assert(0 && "unicode_eq ready fail"); diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 55ac77d..cd7cac4 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -142,6 +142,8 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, mask = 0; if (mode != FAST_RSEARCH) { + const STRINGLIB_CHAR *ss = s + m - 1; + const STRINGLIB_CHAR *pp = p + m - 1; /* create compressed boyer-moore delta 1 table */ @@ -156,7 +158,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, for (i = 0; i <= w; i++) { /* note: using mlast in the skip path slows things down on x86 */ - if (s[i+m-1] == p[m-1]) { + if (ss[i] == pp[0]) { /* candidate match */ for (j = 0; j < mlast; j++) if (s[i+j] != p[j]) @@ -172,13 +174,13 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, continue; } /* miss: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, s[i+m])) + if (!STRINGLIB_BLOOM(mask, ss[i+1])) i = i + m; else i = i + skip; } else { /* skip: check if next character is part of pattern */ - if (!STRINGLIB_BLOOM(mask, s[i+m])) + if (!STRINGLIB_BLOOM(mask, ss[i+1])) i = i + m; } } diff --git a/Objects/stringlib/find_max_char.h b/Objects/stringlib/find_max_char.h index 06559c8..eb3fe88 100644 --- a/Objects/stringlib/find_max_char.h +++ b/Objects/stringlib/find_max_char.h @@ -24,7 +24,7 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) while (p < end) { if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) { /* Help register allocation */ - register const unsigned char *_p = p; + const unsigned char *_p = p; while (_p < aligned_end) { unsigned long value = *(unsigned long *) _p; if (value & UCS1_ASCII_CHAR_MASK) @@ -66,7 +66,7 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) #else #error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4) #endif - register Py_UCS4 mask; + Py_UCS4 mask; Py_ssize_t n = end - begin; const STRINGLIB_CHAR *p = begin; const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4); diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h new file mode 100644 index 0000000..5568b31 --- /dev/null +++ b/Objects/stringlib/join.h @@ -0,0 +1,133 @@ +/* stringlib: bytes joining implementation */ + +#if STRINGLIB_SIZEOF_CHAR != 1 +#error join.h only compatible with byte-wise strings +#endif + +Py_LOCAL_INLINE(PyObject *) +STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable) +{ + char *sepstr = STRINGLIB_STR(sep); + const Py_ssize_t seplen = STRINGLIB_LEN(sep); + PyObject *res = NULL; + char *p; + Py_ssize_t seqlen = 0; + Py_ssize_t sz = 0; + Py_ssize_t i, nbufs; + PyObject *seq, *item; + Py_buffer *buffers = NULL; +#define NB_STATIC_BUFFERS 10 + Py_buffer static_buffers[NB_STATIC_BUFFERS]; + + seq = PySequence_Fast(iterable, "can only join an iterable"); + if (seq == NULL) { + return NULL; + } + + seqlen = PySequence_Fast_GET_SIZE(seq); + if (seqlen == 0) { + Py_DECREF(seq); + return STRINGLIB_NEW(NULL, 0); + } +#ifndef STRINGLIB_MUTABLE + if (seqlen == 1) { + item = PySequence_Fast_GET_ITEM(seq, 0); + if (STRINGLIB_CHECK_EXACT(item)) { + Py_INCREF(item); + Py_DECREF(seq); + return item; + } + } +#endif + if (seqlen > NB_STATIC_BUFFERS) { + buffers = PyMem_NEW(Py_buffer, seqlen); + if (buffers == NULL) { + Py_DECREF(seq); + PyErr_NoMemory(); + return NULL; + } + } + else { + buffers = static_buffers; + } + + /* Here is the general case. Do a pre-pass to figure out the total + * amount of space we'll need (sz), and see whether all arguments are + * buffer-compatible. + */ + for (i = 0, nbufs = 0; i < seqlen; i++) { + Py_ssize_t itemlen; + item = PySequence_Fast_GET_ITEM(seq, i); + if (_getbuffer(item, &buffers[i]) < 0) { + PyErr_Format(PyExc_TypeError, + "sequence item %zd: expected bytes, bytearray, " + "or an object with the buffer interface, %.80s found", + i, Py_TYPE(item)->tp_name); + goto error; + } + nbufs = i + 1; /* for error cleanup */ + itemlen = buffers[i].len; + if (itemlen > PY_SSIZE_T_MAX - sz) { + PyErr_SetString(PyExc_OverflowError, + "join() result is too long"); + goto error; + } + sz += itemlen; + if (i != 0) { + if (seplen > PY_SSIZE_T_MAX - sz) { + PyErr_SetString(PyExc_OverflowError, + "join() result is too long"); + goto error; + } + sz += seplen; + } + if (seqlen != PySequence_Fast_GET_SIZE(seq)) { + PyErr_SetString(PyExc_RuntimeError, + "sequence changed size during iteration"); + goto error; + } + } + + /* Allocate result space. */ + res = STRINGLIB_NEW(NULL, sz); + if (res == NULL) + goto error; + + /* Catenate everything. */ + p = STRINGLIB_STR(res); + if (!seplen) { + /* fast path */ + for (i = 0; i < nbufs; i++) { + Py_ssize_t n = buffers[i].len; + char *q = buffers[i].buf; + Py_MEMCPY(p, q, n); + p += n; + } + goto done; + } + for (i = 0; i < nbufs; i++) { + Py_ssize_t n; + char *q; + if (i) { + Py_MEMCPY(p, sepstr, seplen); + p += seplen; + } + n = buffers[i].len; + q = buffers[i].buf; + Py_MEMCPY(p, q, n); + p += n; + } + goto done; + +error: + res = NULL; +done: + Py_DECREF(seq); + for (i = 0; i < nbufs; i++) + PyBuffer_Release(&buffers[i]); + if (buffers != static_buffers) + PyMem_FREE(buffers); + return res; +} + +#undef NB_STATIC_BUFFERS diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h index 40cb512..ed32a6f 100644 --- a/Objects/stringlib/partition.h +++ b/Objects/stringlib/partition.h @@ -29,6 +29,11 @@ STRINGLIB(partition)(PyObject* str_obj, PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len)); PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0)); PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0)); + + if (PyErr_Occurred()) { + Py_DECREF(out); + return NULL; + } #else Py_INCREF(str_obj); PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj); @@ -79,6 +84,11 @@ STRINGLIB(rpartition)(PyObject* str_obj, PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0)); PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0)); PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len)); + + if (PyErr_Occurred()) { + Py_DECREF(out); + return NULL; + } #else Py_INCREF(STRINGLIB_EMPTY); PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY); diff --git a/Objects/stringlib/replace.h b/Objects/stringlib/replace.h new file mode 100644 index 0000000..ef318ed --- /dev/null +++ b/Objects/stringlib/replace.h @@ -0,0 +1,53 @@ +/* stringlib: replace implementation */ + +#ifndef STRINGLIB_FASTSEARCH_H +#error must include "stringlib/fastsearch.h" before including this module +#endif + +Py_LOCAL_INLINE(void) +STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end, + Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount) +{ + *s = u2; + while (--maxcount && ++s != end) { + /* Find the next character to be replaced. + + If it occurs often, it is faster to scan for it using an inline + loop. If it occurs seldom, it is faster to scan for it using a + function call; the overhead of the function call is amortized + across the many characters that call covers. We start with an + inline loop and use a heuristic to determine whether to fall back + to a function call. */ + if (*s != u1) { + int attempts = 10; + /* search u1 in a dummy loop */ + while (1) { + if (++s == end) + return; + if (*s == u1) + break; + if (!--attempts) { + /* if u1 was not found for attempts iterations, + use FASTSEARCH() or memchr() */ +#if STRINGLIB_SIZEOF_CHAR == 1 + s++; + s = memchr(s, u1, end - s); + if (s == NULL) + return; +#else + Py_ssize_t i; + STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1; + s++; + i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH); + if (i < 0) + return; + s += i; +#endif + /* restart the dummy loop */ + break; + } + } + } + *s = u2; + } +} diff --git a/Objects/stringlib/split.h b/Objects/stringlib/split.h index 947dd28..31f77a7 100644 --- a/Objects/stringlib/split.h +++ b/Objects/stringlib/split.h @@ -345,8 +345,8 @@ STRINGLIB(splitlines)(PyObject* str_obj, and the appends only done when the prealloc buffer is full. That's too much work for little gain.*/ - register Py_ssize_t i; - register Py_ssize_t j; + Py_ssize_t i; + Py_ssize_t j; PyObject *list = PyList_New(0); PyObject *sub; diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h index 7bb91a7..ce27f3e 100644 --- a/Objects/stringlib/stringdefs.h +++ b/Objects/stringlib/stringdefs.h @@ -21,7 +21,6 @@ #define STRINGLIB_STR PyBytes_AS_STRING #define STRINGLIB_LEN PyBytes_GET_SIZE #define STRINGLIB_NEW PyBytes_FromStringAndSize -#define STRINGLIB_RESIZE _PyBytes_Resize #define STRINGLIB_CHECK PyBytes_Check #define STRINGLIB_CHECK_EXACT PyBytes_CheckExact #define STRINGLIB_TOSTR PyObject_Str diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h index 90fa129..dd00976 100644 --- a/Objects/stringlib/transmogrify.h +++ b/Objects/stringlib/transmogrify.h @@ -5,21 +5,23 @@ shared code in bytes_methods.c to cut down on duplicate code bloat. */ PyDoc_STRVAR(expandtabs__doc__, -"B.expandtabs([tabsize]) -> copy of B\n\ +"B.expandtabs(tabsize=8) -> copy of B\n\ \n\ Return a copy of B where all tab characters are expanded using spaces.\n\ If tabsize is not given, a tab size of 8 characters is assumed."); static PyObject* -stringlib_expandtabs(PyObject *self, PyObject *args) +stringlib_expandtabs(PyObject *self, PyObject *args, PyObject *kwds) { const char *e, *p; char *q; size_t i, j; PyObject *u; + static char *kwlist[] = {"tabsize", 0}; int tabsize = 8; - if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs", + kwlist, &tabsize)) return NULL; /* First pass: determine size of output string */ diff --git a/Objects/stringlib/ucs1lib.h b/Objects/stringlib/ucs1lib.h index e8c6fcb..ce1eb57 100644 --- a/Objects/stringlib/ucs1lib.h +++ b/Objects/stringlib/ucs1lib.h @@ -19,7 +19,6 @@ #define STRINGLIB_STR PyUnicode_1BYTE_DATA #define STRINGLIB_LEN PyUnicode_GET_LENGTH #define STRINGLIB_NEW _PyUnicode_FromUCS1 -#define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact diff --git a/Objects/stringlib/ucs2lib.h b/Objects/stringlib/ucs2lib.h index 45e5729..f900cb6 100644 --- a/Objects/stringlib/ucs2lib.h +++ b/Objects/stringlib/ucs2lib.h @@ -19,7 +19,6 @@ #define STRINGLIB_STR PyUnicode_2BYTE_DATA #define STRINGLIB_LEN PyUnicode_GET_LENGTH #define STRINGLIB_NEW _PyUnicode_FromUCS2 -#define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact diff --git a/Objects/stringlib/ucs4lib.h b/Objects/stringlib/ucs4lib.h index 647a27e..86a480f 100644 --- a/Objects/stringlib/ucs4lib.h +++ b/Objects/stringlib/ucs4lib.h @@ -19,7 +19,6 @@ #define STRINGLIB_STR PyUnicode_4BYTE_DATA #define STRINGLIB_LEN PyUnicode_GET_LENGTH #define STRINGLIB_NEW _PyUnicode_FromUCS4 -#define STRINGLIB_RESIZE not_supported #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact diff --git a/Objects/stringlib/undef.h b/Objects/stringlib/undef.h index 03117ec..f9d3f1d 100644 --- a/Objects/stringlib/undef.h +++ b/Objects/stringlib/undef.h @@ -6,7 +6,6 @@ #undef STRINGLIB_STR #undef STRINGLIB_LEN #undef STRINGLIB_NEW -#undef STRINGLIB_RESIZE #undef _Py_InsertThousandsGrouping #undef STRINGLIB_IS_UNICODE diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h index c1c2cf3..aec221a 100644 --- a/Objects/stringlib/unicode_format.h +++ b/Objects/stringlib/unicode_format.h @@ -543,7 +543,7 @@ done: static int parse_field(SubString *str, SubString *field_name, SubString *format_spec, - Py_UCS4 *conversion) + int *format_spec_needs_expanding, Py_UCS4 *conversion) { /* Note this function works if the field name is zero length, which is good. Zero length field names are handled later, in @@ -561,6 +561,15 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec, field_name->start = str->start; while (str->start < str->end) { switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { + case '{': + PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name"); + return 0; + case '[': + for (; str->start < str->end; str->start++) + if (PyUnicode_READ_CHAR(str->str, str->start) == ']') + break; + continue; + case '}': case ':': case '!': break; @@ -570,41 +579,62 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec, break; } + field_name->end = str->start - 1; if (c == '!' || c == ':') { + Py_ssize_t count; /* we have a format specifier and/or a conversion */ /* don't include the last character */ - field_name->end = str->start-1; - - /* the format specifier is the rest of the string */ - format_spec->str = str->str; - format_spec->start = str->start; - format_spec->end = str->end; /* see if there's a conversion specifier */ if (c == '!') { /* there must be another character present */ - if (format_spec->start >= format_spec->end) { + if (str->start >= str->end) { PyErr_SetString(PyExc_ValueError, - "end of format while looking for conversion " + "end of string while looking for conversion " "specifier"); return 0; } - *conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++); + *conversion = PyUnicode_READ_CHAR(str->str, str->start++); - /* if there is another character, it must be a colon */ - if (format_spec->start < format_spec->end) { - c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++); + if (str->start < str->end) { + c = PyUnicode_READ_CHAR(str->str, str->start++); + if (c == '}') + return 1; if (c != ':') { PyErr_SetString(PyExc_ValueError, - "expected ':' after format specifier"); + "expected ':' after conversion specifier"); return 0; } } } + format_spec->str = str->str; + format_spec->start = str->start; + count = 1; + while (str->start < str->end) { + switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) { + case '{': + *format_spec_needs_expanding = 1; + count++; + break; + case '}': + count--; + if (count == 0) { + format_spec->end = str->start - 1; + return 1; + } + break; + default: + break; + } + } + + PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec"); + return 0; + } + else if (c != '}') { + PyErr_SetString(PyExc_ValueError, "expected '}' before end of string"); + return 0; } - else - /* end of string, there's no format_spec or conversion */ - field_name->end = str->start; return 1; } @@ -638,10 +668,9 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal, SubString *format_spec, Py_UCS4 *conversion, int *format_spec_needs_expanding) { - int at_end, hit_format_spec; + int at_end; Py_UCS4 c = 0; Py_ssize_t start; - int count; Py_ssize_t len; int markup_follows = 0; @@ -713,50 +742,12 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal, if (!markup_follows) return 2; - /* this is markup, find the end of the string by counting nested - braces. note that this prohibits escaped braces, so that - format_specs cannot have braces in them. */ + /* this is markup; parse the field */ *field_present = 1; - count = 1; - - start = self->str.start; - - /* we know we can't have a zero length string, so don't worry - about that case */ - hit_format_spec = 0; - while (self->str.start < self->str.end) { - switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) { - case ':': - hit_format_spec = 1; - count = 1; - break; - case '{': - /* the format spec needs to be recursively expanded. - this is an optimization, and not strictly needed */ - if (hit_format_spec) - *format_spec_needs_expanding = 1; - count++; - break; - case '}': - count--; - if (count <= 0) { - /* we're done. parse and get out */ - SubString s; - - SubString_init(&s, self->str.str, start, self->str.start - 1); - if (parse_field(&s, field_name, format_spec, conversion) == 0) - return 0; - - /* success */ - return 2; - } - break; - } - } - - /* end of string while searching for matching '}' */ - PyErr_SetString(PyExc_ValueError, "unmatched '{' in format"); - return 0; + if (!parse_field(&self->str, field_name, format_spec, + format_spec_needs_expanding, conversion)) + return 0; + return 2; } @@ -875,25 +866,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, SubString literal; SubString field_name; SubString format_spec; - Py_UCS4 conversion, maxchar; - Py_ssize_t sublen; - int err; + Py_UCS4 conversion; MarkupIterator_init(&iter, input->str, input->start, input->end); while ((result = MarkupIterator_next(&iter, &literal, &field_present, &field_name, &format_spec, &conversion, &format_spec_needs_expanding)) == 2) { - sublen = literal.end - literal.start; - if (sublen) { - maxchar = _PyUnicode_FindMaxChar(literal.str, - literal.start, literal.end); - err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar); - if (err == -1) + if (literal.end != literal.start) { + if (!field_present && iter.str.start == iter.str.end) + writer->overallocate = 0; + if (_PyUnicodeWriter_WriteSubstring(writer, literal.str, + literal.start, literal.end) < 0) return 0; - _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, - literal.str, literal.start, sublen); - writer->pos += sublen; } if (field_present) { @@ -918,7 +903,6 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs, int recursion_depth, AutoNumber *auto_number) { _PyUnicodeWriter writer; - Py_ssize_t minlen; /* check the recursion level */ if (recursion_depth <= 0) { @@ -927,8 +911,9 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs, return NULL; } - minlen = PyUnicode_GET_LENGTH(input->str) + 100; - _PyUnicodeWriter_Init(&writer, minlen); + _PyUnicodeWriter_Init(&writer); + writer.overallocate = 1; + writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100; if (!do_markup(input, args, kwargs, &writer, recursion_depth, auto_number)) { diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h index f16f21e..3db5629 100644 --- a/Objects/stringlib/unicodedefs.h +++ b/Objects/stringlib/unicodedefs.h @@ -21,17 +21,11 @@ #define STRINGLIB_STR PyUnicode_AS_UNICODE #define STRINGLIB_LEN PyUnicode_GET_SIZE #define STRINGLIB_NEW PyUnicode_FromUnicode -#define STRINGLIB_RESIZE PyUnicode_Resize #define STRINGLIB_CHECK PyUnicode_Check #define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact -#if PY_VERSION_HEX < 0x03000000 -#define STRINGLIB_TOSTR PyObject_Unicode -#define STRINGLIB_TOASCII PyObject_Repr -#else #define STRINGLIB_TOSTR PyObject_Str #define STRINGLIB_TOASCII PyObject_ASCII -#endif #define STRINGLIB_WANT_CONTAINS_OBJ 1 |