summaryrefslogtreecommitdiffstats
path: root/Objects/stringlib
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/stringlib')
-rw-r--r--Objects/stringlib/asciilib.h1
-rw-r--r--Objects/stringlib/codecs.h12
-rw-r--r--Objects/stringlib/eq.h4
-rw-r--r--Objects/stringlib/fastsearch.h8
-rw-r--r--Objects/stringlib/find_max_char.h4
-rw-r--r--Objects/stringlib/join.h133
-rw-r--r--Objects/stringlib/partition.h10
-rw-r--r--Objects/stringlib/replace.h53
-rw-r--r--Objects/stringlib/split.h4
-rw-r--r--Objects/stringlib/stringdefs.h1
-rw-r--r--Objects/stringlib/transmogrify.h8
-rw-r--r--Objects/stringlib/ucs1lib.h1
-rw-r--r--Objects/stringlib/ucs2lib.h1
-rw-r--r--Objects/stringlib/ucs4lib.h1
-rw-r--r--Objects/stringlib/undef.h1
-rw-r--r--Objects/stringlib/unicode_format.h139
-rw-r--r--Objects/stringlib/unicodedefs.h6
17 files changed, 280 insertions, 107 deletions
diff --git a/Objects/stringlib/asciilib.h b/Objects/stringlib/asciilib.h
index f62813d..d0fc18d 100644
--- a/Objects/stringlib/asciilib.h
+++ b/Objects/stringlib/asciilib.h
@@ -19,7 +19,6 @@
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW(STR,LEN) _PyUnicode_FromASCII((char*)(STR),(LEN))
-#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index f353367..57319c6 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -38,8 +38,8 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
*/
if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) {
/* Help register allocation */
- register const char *_s = s;
- register STRINGLIB_CHAR *_p = p;
+ const char *_s = s;
+ STRINGLIB_CHAR *_p = p;
while (_s < aligned_end) {
/* Read a whole long at a time (either 4 or 8 bytes),
and do a fast unrolled copy if it only contains ASCII
@@ -47,7 +47,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
unsigned long value = *(unsigned long *) _s;
if (value & ASCII_CHAR_MASK)
break;
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
_p[0] = (STRINGLIB_CHAR)(value & 0xFFu);
_p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
_p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
@@ -486,7 +486,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
const unsigned char *q = *inptr;
STRINGLIB_CHAR *p = dest + *outpos;
/* Offsets from q for retrieving byte pairs in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
int ihi = !!native_ordering, ilo = !native_ordering;
#else
int ihi = !native_ordering, ilo = !!native_ordering;
@@ -499,7 +499,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
reads are more expensive, better to defer to another iteration. */
if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) {
/* Fast path for runs of in-range non-surrogate chars. */
- register const unsigned char *_q = q;
+ const unsigned char *_q = q;
while (_q < aligned_end) {
unsigned long block = * (unsigned long *) _q;
if (native_ordering) {
@@ -517,7 +517,7 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
block = SWAB(block);
#endif
}
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+#if PY_LITTLE_ENDIAN
# if SIZEOF_LONG == 4
p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
p[1] = (STRINGLIB_CHAR)(block >> 16);
diff --git a/Objects/stringlib/eq.h b/Objects/stringlib/eq.h
index 3e5f510..f8fd384 100644
--- a/Objects/stringlib/eq.h
+++ b/Objects/stringlib/eq.h
@@ -6,8 +6,8 @@
Py_LOCAL_INLINE(int)
unicode_eq(PyObject *aa, PyObject *bb)
{
- register PyUnicodeObject *a = (PyUnicodeObject *)aa;
- register PyUnicodeObject *b = (PyUnicodeObject *)bb;
+ PyUnicodeObject *a = (PyUnicodeObject *)aa;
+ PyUnicodeObject *b = (PyUnicodeObject *)bb;
if (PyUnicode_READY(a) == -1 || PyUnicode_READY(b) == -1) {
assert(0 && "unicode_eq ready fail");
diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h
index 55ac77d..cd7cac4 100644
--- a/Objects/stringlib/fastsearch.h
+++ b/Objects/stringlib/fastsearch.h
@@ -142,6 +142,8 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
mask = 0;
if (mode != FAST_RSEARCH) {
+ const STRINGLIB_CHAR *ss = s + m - 1;
+ const STRINGLIB_CHAR *pp = p + m - 1;
/* create compressed boyer-moore delta 1 table */
@@ -156,7 +158,7 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
for (i = 0; i <= w; i++) {
/* note: using mlast in the skip path slows things down on x86 */
- if (s[i+m-1] == p[m-1]) {
+ if (ss[i] == pp[0]) {
/* candidate match */
for (j = 0; j < mlast; j++)
if (s[i+j] != p[j])
@@ -172,13 +174,13 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
continue;
}
/* miss: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, s[i+m]))
+ if (!STRINGLIB_BLOOM(mask, ss[i+1]))
i = i + m;
else
i = i + skip;
} else {
/* skip: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, s[i+m]))
+ if (!STRINGLIB_BLOOM(mask, ss[i+1]))
i = i + m;
}
}
diff --git a/Objects/stringlib/find_max_char.h b/Objects/stringlib/find_max_char.h
index 06559c8..eb3fe88 100644
--- a/Objects/stringlib/find_max_char.h
+++ b/Objects/stringlib/find_max_char.h
@@ -24,7 +24,7 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
while (p < end) {
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
/* Help register allocation */
- register const unsigned char *_p = p;
+ const unsigned char *_p = p;
while (_p < aligned_end) {
unsigned long value = *(unsigned long *) _p;
if (value & UCS1_ASCII_CHAR_MASK)
@@ -66,7 +66,7 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
#else
#error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4)
#endif
- register Py_UCS4 mask;
+ Py_UCS4 mask;
Py_ssize_t n = end - begin;
const STRINGLIB_CHAR *p = begin;
const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4);
diff --git a/Objects/stringlib/join.h b/Objects/stringlib/join.h
new file mode 100644
index 0000000..5568b31
--- /dev/null
+++ b/Objects/stringlib/join.h
@@ -0,0 +1,133 @@
+/* stringlib: bytes joining implementation */
+
+#if STRINGLIB_SIZEOF_CHAR != 1
+#error join.h only compatible with byte-wise strings
+#endif
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
+{
+ char *sepstr = STRINGLIB_STR(sep);
+ const Py_ssize_t seplen = STRINGLIB_LEN(sep);
+ PyObject *res = NULL;
+ char *p;
+ Py_ssize_t seqlen = 0;
+ Py_ssize_t sz = 0;
+ Py_ssize_t i, nbufs;
+ PyObject *seq, *item;
+ Py_buffer *buffers = NULL;
+#define NB_STATIC_BUFFERS 10
+ Py_buffer static_buffers[NB_STATIC_BUFFERS];
+
+ seq = PySequence_Fast(iterable, "can only join an iterable");
+ if (seq == NULL) {
+ return NULL;
+ }
+
+ seqlen = PySequence_Fast_GET_SIZE(seq);
+ if (seqlen == 0) {
+ Py_DECREF(seq);
+ return STRINGLIB_NEW(NULL, 0);
+ }
+#ifndef STRINGLIB_MUTABLE
+ if (seqlen == 1) {
+ item = PySequence_Fast_GET_ITEM(seq, 0);
+ if (STRINGLIB_CHECK_EXACT(item)) {
+ Py_INCREF(item);
+ Py_DECREF(seq);
+ return item;
+ }
+ }
+#endif
+ if (seqlen > NB_STATIC_BUFFERS) {
+ buffers = PyMem_NEW(Py_buffer, seqlen);
+ if (buffers == NULL) {
+ Py_DECREF(seq);
+ PyErr_NoMemory();
+ return NULL;
+ }
+ }
+ else {
+ buffers = static_buffers;
+ }
+
+ /* Here is the general case. Do a pre-pass to figure out the total
+ * amount of space we'll need (sz), and see whether all arguments are
+ * buffer-compatible.
+ */
+ for (i = 0, nbufs = 0; i < seqlen; i++) {
+ Py_ssize_t itemlen;
+ item = PySequence_Fast_GET_ITEM(seq, i);
+ if (_getbuffer(item, &buffers[i]) < 0) {
+ PyErr_Format(PyExc_TypeError,
+ "sequence item %zd: expected bytes, bytearray, "
+ "or an object with the buffer interface, %.80s found",
+ i, Py_TYPE(item)->tp_name);
+ goto error;
+ }
+ nbufs = i + 1; /* for error cleanup */
+ itemlen = buffers[i].len;
+ if (itemlen > PY_SSIZE_T_MAX - sz) {
+ PyErr_SetString(PyExc_OverflowError,
+ "join() result is too long");
+ goto error;
+ }
+ sz += itemlen;
+ if (i != 0) {
+ if (seplen > PY_SSIZE_T_MAX - sz) {
+ PyErr_SetString(PyExc_OverflowError,
+ "join() result is too long");
+ goto error;
+ }
+ sz += seplen;
+ }
+ if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "sequence changed size during iteration");
+ goto error;
+ }
+ }
+
+ /* Allocate result space. */
+ res = STRINGLIB_NEW(NULL, sz);
+ if (res == NULL)
+ goto error;
+
+ /* Catenate everything. */
+ p = STRINGLIB_STR(res);
+ if (!seplen) {
+ /* fast path */
+ for (i = 0; i < nbufs; i++) {
+ Py_ssize_t n = buffers[i].len;
+ char *q = buffers[i].buf;
+ Py_MEMCPY(p, q, n);
+ p += n;
+ }
+ goto done;
+ }
+ for (i = 0; i < nbufs; i++) {
+ Py_ssize_t n;
+ char *q;
+ if (i) {
+ Py_MEMCPY(p, sepstr, seplen);
+ p += seplen;
+ }
+ n = buffers[i].len;
+ q = buffers[i].buf;
+ Py_MEMCPY(p, q, n);
+ p += n;
+ }
+ goto done;
+
+error:
+ res = NULL;
+done:
+ Py_DECREF(seq);
+ for (i = 0; i < nbufs; i++)
+ PyBuffer_Release(&buffers[i]);
+ if (buffers != static_buffers)
+ PyMem_FREE(buffers);
+ return res;
+}
+
+#undef NB_STATIC_BUFFERS
diff --git a/Objects/stringlib/partition.h b/Objects/stringlib/partition.h
index 40cb512..ed32a6f 100644
--- a/Objects/stringlib/partition.h
+++ b/Objects/stringlib/partition.h
@@ -29,6 +29,11 @@ STRINGLIB(partition)(PyObject* str_obj,
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));
PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));
+
+ if (PyErr_Occurred()) {
+ Py_DECREF(out);
+ return NULL;
+ }
#else
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
@@ -79,6 +84,11 @@ STRINGLIB(rpartition)(PyObject* str_obj,
PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));
+
+ if (PyErr_Occurred()) {
+ Py_DECREF(out);
+ return NULL;
+ }
#else
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
diff --git a/Objects/stringlib/replace.h b/Objects/stringlib/replace.h
new file mode 100644
index 0000000..ef318ed
--- /dev/null
+++ b/Objects/stringlib/replace.h
@@ -0,0 +1,53 @@
+/* stringlib: replace implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+Py_LOCAL_INLINE(void)
+STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
+ Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+ *s = u2;
+ while (--maxcount && ++s != end) {
+ /* Find the next character to be replaced.
+
+ If it occurs often, it is faster to scan for it using an inline
+ loop. If it occurs seldom, it is faster to scan for it using a
+ function call; the overhead of the function call is amortized
+ across the many characters that call covers. We start with an
+ inline loop and use a heuristic to determine whether to fall back
+ to a function call. */
+ if (*s != u1) {
+ int attempts = 10;
+ /* search u1 in a dummy loop */
+ while (1) {
+ if (++s == end)
+ return;
+ if (*s == u1)
+ break;
+ if (!--attempts) {
+ /* if u1 was not found for attempts iterations,
+ use FASTSEARCH() or memchr() */
+#if STRINGLIB_SIZEOF_CHAR == 1
+ s++;
+ s = memchr(s, u1, end - s);
+ if (s == NULL)
+ return;
+#else
+ Py_ssize_t i;
+ STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1;
+ s++;
+ i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH);
+ if (i < 0)
+ return;
+ s += i;
+#endif
+ /* restart the dummy loop */
+ break;
+ }
+ }
+ }
+ *s = u2;
+ }
+}
diff --git a/Objects/stringlib/split.h b/Objects/stringlib/split.h
index 947dd28..31f77a7 100644
--- a/Objects/stringlib/split.h
+++ b/Objects/stringlib/split.h
@@ -345,8 +345,8 @@ STRINGLIB(splitlines)(PyObject* str_obj,
and the appends only done when the prealloc buffer is full.
That's too much work for little gain.*/
- register Py_ssize_t i;
- register Py_ssize_t j;
+ Py_ssize_t i;
+ Py_ssize_t j;
PyObject *list = PyList_New(0);
PyObject *sub;
diff --git a/Objects/stringlib/stringdefs.h b/Objects/stringlib/stringdefs.h
index 7bb91a7..ce27f3e 100644
--- a/Objects/stringlib/stringdefs.h
+++ b/Objects/stringlib/stringdefs.h
@@ -21,7 +21,6 @@
#define STRINGLIB_STR PyBytes_AS_STRING
#define STRINGLIB_LEN PyBytes_GET_SIZE
#define STRINGLIB_NEW PyBytes_FromStringAndSize
-#define STRINGLIB_RESIZE _PyBytes_Resize
#define STRINGLIB_CHECK PyBytes_Check
#define STRINGLIB_CHECK_EXACT PyBytes_CheckExact
#define STRINGLIB_TOSTR PyObject_Str
diff --git a/Objects/stringlib/transmogrify.h b/Objects/stringlib/transmogrify.h
index 90fa129..dd00976 100644
--- a/Objects/stringlib/transmogrify.h
+++ b/Objects/stringlib/transmogrify.h
@@ -5,21 +5,23 @@
shared code in bytes_methods.c to cut down on duplicate code bloat. */
PyDoc_STRVAR(expandtabs__doc__,
-"B.expandtabs([tabsize]) -> copy of B\n\
+"B.expandtabs(tabsize=8) -> copy of B\n\
\n\
Return a copy of B where all tab characters are expanded using spaces.\n\
If tabsize is not given, a tab size of 8 characters is assumed.");
static PyObject*
-stringlib_expandtabs(PyObject *self, PyObject *args)
+stringlib_expandtabs(PyObject *self, PyObject *args, PyObject *kwds)
{
const char *e, *p;
char *q;
size_t i, j;
PyObject *u;
+ static char *kwlist[] = {"tabsize", 0};
int tabsize = 8;
- if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i:expandtabs",
+ kwlist, &tabsize))
return NULL;
/* First pass: determine size of output string */
diff --git a/Objects/stringlib/ucs1lib.h b/Objects/stringlib/ucs1lib.h
index e8c6fcb..ce1eb57 100644
--- a/Objects/stringlib/ucs1lib.h
+++ b/Objects/stringlib/ucs1lib.h
@@ -19,7 +19,6 @@
#define STRINGLIB_STR PyUnicode_1BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW _PyUnicode_FromUCS1
-#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/ucs2lib.h b/Objects/stringlib/ucs2lib.h
index 45e5729..f900cb6 100644
--- a/Objects/stringlib/ucs2lib.h
+++ b/Objects/stringlib/ucs2lib.h
@@ -19,7 +19,6 @@
#define STRINGLIB_STR PyUnicode_2BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW _PyUnicode_FromUCS2
-#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/ucs4lib.h b/Objects/stringlib/ucs4lib.h
index 647a27e..86a480f 100644
--- a/Objects/stringlib/ucs4lib.h
+++ b/Objects/stringlib/ucs4lib.h
@@ -19,7 +19,6 @@
#define STRINGLIB_STR PyUnicode_4BYTE_DATA
#define STRINGLIB_LEN PyUnicode_GET_LENGTH
#define STRINGLIB_NEW _PyUnicode_FromUCS4
-#define STRINGLIB_RESIZE not_supported
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
diff --git a/Objects/stringlib/undef.h b/Objects/stringlib/undef.h
index 03117ec..f9d3f1d 100644
--- a/Objects/stringlib/undef.h
+++ b/Objects/stringlib/undef.h
@@ -6,7 +6,6 @@
#undef STRINGLIB_STR
#undef STRINGLIB_LEN
#undef STRINGLIB_NEW
-#undef STRINGLIB_RESIZE
#undef _Py_InsertThousandsGrouping
#undef STRINGLIB_IS_UNICODE
diff --git a/Objects/stringlib/unicode_format.h b/Objects/stringlib/unicode_format.h
index c1c2cf3..aec221a 100644
--- a/Objects/stringlib/unicode_format.h
+++ b/Objects/stringlib/unicode_format.h
@@ -543,7 +543,7 @@ done:
static int
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
- Py_UCS4 *conversion)
+ int *format_spec_needs_expanding, Py_UCS4 *conversion)
{
/* Note this function works if the field name is zero length,
which is good. Zero length field names are handled later, in
@@ -561,6 +561,15 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
field_name->start = str->start;
while (str->start < str->end) {
switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
+ case '{':
+ PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
+ return 0;
+ case '[':
+ for (; str->start < str->end; str->start++)
+ if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
+ break;
+ continue;
+ case '}':
case ':':
case '!':
break;
@@ -570,41 +579,62 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
break;
}
+ field_name->end = str->start - 1;
if (c == '!' || c == ':') {
+ Py_ssize_t count;
/* we have a format specifier and/or a conversion */
/* don't include the last character */
- field_name->end = str->start-1;
-
- /* the format specifier is the rest of the string */
- format_spec->str = str->str;
- format_spec->start = str->start;
- format_spec->end = str->end;
/* see if there's a conversion specifier */
if (c == '!') {
/* there must be another character present */
- if (format_spec->start >= format_spec->end) {
+ if (str->start >= str->end) {
PyErr_SetString(PyExc_ValueError,
- "end of format while looking for conversion "
+ "end of string while looking for conversion "
"specifier");
return 0;
}
- *conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
+ *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
- /* if there is another character, it must be a colon */
- if (format_spec->start < format_spec->end) {
- c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
+ if (str->start < str->end) {
+ c = PyUnicode_READ_CHAR(str->str, str->start++);
+ if (c == '}')
+ return 1;
if (c != ':') {
PyErr_SetString(PyExc_ValueError,
- "expected ':' after format specifier");
+ "expected ':' after conversion specifier");
return 0;
}
}
}
+ format_spec->str = str->str;
+ format_spec->start = str->start;
+ count = 1;
+ while (str->start < str->end) {
+ switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
+ case '{':
+ *format_spec_needs_expanding = 1;
+ count++;
+ break;
+ case '}':
+ count--;
+ if (count == 0) {
+ format_spec->end = str->start - 1;
+ return 1;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
+ return 0;
+ }
+ else if (c != '}') {
+ PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
+ return 0;
}
- else
- /* end of string, there's no format_spec or conversion */
- field_name->end = str->start;
return 1;
}
@@ -638,10 +668,9 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
SubString *format_spec, Py_UCS4 *conversion,
int *format_spec_needs_expanding)
{
- int at_end, hit_format_spec;
+ int at_end;
Py_UCS4 c = 0;
Py_ssize_t start;
- int count;
Py_ssize_t len;
int markup_follows = 0;
@@ -713,50 +742,12 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
if (!markup_follows)
return 2;
- /* this is markup, find the end of the string by counting nested
- braces. note that this prohibits escaped braces, so that
- format_specs cannot have braces in them. */
+ /* this is markup; parse the field */
*field_present = 1;
- count = 1;
-
- start = self->str.start;
-
- /* we know we can't have a zero length string, so don't worry
- about that case */
- hit_format_spec = 0;
- while (self->str.start < self->str.end) {
- switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
- case ':':
- hit_format_spec = 1;
- count = 1;
- break;
- case '{':
- /* the format spec needs to be recursively expanded.
- this is an optimization, and not strictly needed */
- if (hit_format_spec)
- *format_spec_needs_expanding = 1;
- count++;
- break;
- case '}':
- count--;
- if (count <= 0) {
- /* we're done. parse and get out */
- SubString s;
-
- SubString_init(&s, self->str.str, start, self->str.start - 1);
- if (parse_field(&s, field_name, format_spec, conversion) == 0)
- return 0;
-
- /* success */
- return 2;
- }
- break;
- }
- }
-
- /* end of string while searching for matching '}' */
- PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
- return 0;
+ if (!parse_field(&self->str, field_name, format_spec,
+ format_spec_needs_expanding, conversion))
+ return 0;
+ return 2;
}
@@ -875,25 +866,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
SubString literal;
SubString field_name;
SubString format_spec;
- Py_UCS4 conversion, maxchar;
- Py_ssize_t sublen;
- int err;
+ Py_UCS4 conversion;
MarkupIterator_init(&iter, input->str, input->start, input->end);
while ((result = MarkupIterator_next(&iter, &literal, &field_present,
&field_name, &format_spec,
&conversion,
&format_spec_needs_expanding)) == 2) {
- sublen = literal.end - literal.start;
- if (sublen) {
- maxchar = _PyUnicode_FindMaxChar(literal.str,
- literal.start, literal.end);
- err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
- if (err == -1)
+ if (literal.end != literal.start) {
+ if (!field_present && iter.str.start == iter.str.end)
+ writer->overallocate = 0;
+ if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
+ literal.start, literal.end) < 0)
return 0;
- _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
- literal.str, literal.start, sublen);
- writer->pos += sublen;
}
if (field_present) {
@@ -918,7 +903,6 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
int recursion_depth, AutoNumber *auto_number)
{
_PyUnicodeWriter writer;
- Py_ssize_t minlen;
/* check the recursion level */
if (recursion_depth <= 0) {
@@ -927,8 +911,9 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
return NULL;
}
- minlen = PyUnicode_GET_LENGTH(input->str) + 100;
- _PyUnicodeWriter_Init(&writer, minlen);
+ _PyUnicodeWriter_Init(&writer);
+ writer.overallocate = 1;
+ writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
if (!do_markup(input, args, kwargs, &writer, recursion_depth,
auto_number)) {
diff --git a/Objects/stringlib/unicodedefs.h b/Objects/stringlib/unicodedefs.h
index f16f21e..3db5629 100644
--- a/Objects/stringlib/unicodedefs.h
+++ b/Objects/stringlib/unicodedefs.h
@@ -21,17 +21,11 @@
#define STRINGLIB_STR PyUnicode_AS_UNICODE
#define STRINGLIB_LEN PyUnicode_GET_SIZE
#define STRINGLIB_NEW PyUnicode_FromUnicode
-#define STRINGLIB_RESIZE PyUnicode_Resize
#define STRINGLIB_CHECK PyUnicode_Check
#define STRINGLIB_CHECK_EXACT PyUnicode_CheckExact
-#if PY_VERSION_HEX < 0x03000000
-#define STRINGLIB_TOSTR PyObject_Unicode
-#define STRINGLIB_TOASCII PyObject_Repr
-#else
#define STRINGLIB_TOSTR PyObject_Str
#define STRINGLIB_TOASCII PyObject_ASCII
-#endif
#define STRINGLIB_WANT_CONTAINS_OBJ 1