diff options
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/stringlib/string_format.h | 312 |
1 files changed, 157 insertions, 155 deletions
diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h index b8ea03f..dbd01d3 100644 --- a/Objects/stringlib/string_format.h +++ b/Objects/stringlib/string_format.h @@ -48,9 +48,24 @@ SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len) str->end = str->ptr + len; } +/* return a new string. if str->ptr is NULL, return None */ Py_LOCAL_INLINE(PyObject *) SubString_new_object(SubString *str) { + if (str->ptr == NULL) { + Py_INCREF(Py_None); + return Py_None; + } + return STRINGLIB_NEW(str->ptr, str->end - str->ptr); +} + +/* return a new string. if str->ptr is NULL, return None */ +Py_LOCAL_INLINE(PyObject *) +SubString_new_object_or_empty(SubString *str) +{ + if (str->ptr == NULL) { + return STRINGLIB_NEW(NULL, 0); + } return STRINGLIB_NEW(str->ptr, str->end - str->ptr); } @@ -481,7 +496,7 @@ format(PyObject *fieldobj, SubString *format_spec) return NULL; /* we need to create an object out of the pointers we have */ - spec = SubString_new_object(format_spec); + spec = SubString_new_object_or_empty(format_spec); if (spec == NULL) goto done; @@ -609,21 +624,19 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec, typedef struct { SubString str; - int in_markup; } MarkupIterator; static int MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len) { SubString_init(&self->str, ptr, len); - self->in_markup = 0; return 1; } /* returns 0 on error, 1 on non-error termination, and 2 if it got a string (or something to be expanded) */ static int -MarkupIterator_next(MarkupIterator *self, int *is_markup, SubString *literal, +MarkupIterator_next(MarkupIterator *self, SubString *literal, SubString *field_name, SubString *format_spec, STRINGLIB_CHAR *conversion, int *format_spec_needs_expanding) @@ -633,101 +646,116 @@ MarkupIterator_next(MarkupIterator *self, int *is_markup, SubString *literal, STRINGLIB_CHAR *start; int count; Py_ssize_t len; + int markup_follows = 0; + /* initialize all of the output variables */ + SubString_init(literal, NULL, 0); + SubString_init(field_name, NULL, 0); + SubString_init(format_spec, NULL, 0); + *conversion = '\0'; *format_spec_needs_expanding = 0; - /* no more input, end of iterator */ + /* No more input, end of iterator. This is the normal exit + path. */ if (self->str.ptr >= self->str.end) return 1; - *is_markup = self->in_markup; start = self->str.ptr; - if (self->in_markup) { - - /* prepare for next iteration */ - self->in_markup = 0; - - /* this is markup, find the end of the string by counting nested - braces. note that this prohibits escaped braces, so that - format_specs cannot have braces in them. */ - count = 1; - - /* we know we can't have a zero length string, so don't worry - about that case */ - while (self->str.ptr < self->str.end) { - switch (c = *(self->str.ptr++)) { - case '{': - /* the format spec needs to be recursively expanded. - this is an optimization, and not strictly needed */ - *format_spec_needs_expanding = 1; - count++; - break; - case '}': - count--; - if (count <= 0) { - /* we're done. parse and get out */ - literal->ptr = start; - literal->end = self->str.ptr-1; - - if (parse_field(literal, field_name, format_spec, - conversion) == 0) - return 0; - - /* success */ - return 2; - } - break; - } + /* First read any literal text. Read until the end of string, an + escaped '{' or '}', or an unescaped '{'. In order to never + allocate memory and so I can just pass pointers around, if + there's an escaped '{' or '}' then we'll return the literal + including the brace, but no format object. The next time + through, we'll return the rest of the literal, skipping past + the second consecutive brace. */ + while (self->str.ptr < self->str.end) { + switch (c = *(self->str.ptr++)) { + case '{': + case '}': + markup_follows = 1; + break; + default: + continue; } - /* end of string while searching for matching '}' */ - PyErr_SetString(PyExc_ValueError, "unmatched '{' in format"); - return 0; + break; + } + at_end = self->str.ptr >= self->str.end; + len = self->str.ptr - start; + + if ((c == '}') && (at_end || (c != *self->str.ptr))) { + PyErr_SetString(PyExc_ValueError, "Single '}' encountered " + "in format string"); + return 0; } - else { - /* literal text, read until the end of string, an escaped { or }, - or an unescaped { */ - while (self->str.ptr < self->str.end) { - switch (c = *(self->str.ptr++)) { - case '{': - case '}': - self->in_markup = 1; - break; - default: - continue; - } - break; + if (at_end && c == '{') { + PyErr_SetString(PyExc_ValueError, "Single '{' encountered " + "in format string"); + return 0; + } + if (!at_end) { + if (c == *self->str.ptr) { + /* escaped } or {, skip it in the input. there is no + markup object following us, just this literal text */ + self->str.ptr++; + markup_follows = 0; } + else + len--; + } - at_end = self->str.ptr >= self->str.end; - len = self->str.ptr - start; + /* record the literal text */ + literal->ptr = start; + literal->end = start + len; - if ((c == '}') && (at_end || (c != *self->str.ptr))) { - PyErr_SetString(PyExc_ValueError, "Single '}' encountered " - "in format string"); - return 0; - } - if (at_end && c == '{') { - PyErr_SetString(PyExc_ValueError, "Single '{' encountered " - "in format string"); - return 0; - } - if (!at_end) { - if (c == *self->str.ptr) { - /* escaped } or {, skip it in the input */ - self->str.ptr++; - self->in_markup = 0; + if (!markup_follows) + return 2; + + /* this is markup, find the end of the string by counting nested + braces. note that this prohibits escaped braces, so that + format_specs cannot have braces in them. */ + count = 1; + + start = self->str.ptr; + + /* we know we can't have a zero length string, so don't worry + about that case */ + while (self->str.ptr < self->str.end) { + switch (c = *(self->str.ptr++)) { + case '{': + /* the format spec needs to be recursively expanded. + this is an optimization, and not strictly needed */ + *format_spec_needs_expanding = 1; + count++; + break; + case '}': + count--; + if (count <= 0) { + /* we're done. parse and get out */ + SubString s; + + SubString_init(&s, start, self->str.ptr - 1 - start); + if (parse_field(&s, field_name, format_spec, conversion) == 0) + return 0; + + /* a zero length field_name is an error */ + if (field_name->ptr == field_name->end) { + PyErr_SetString(PyExc_ValueError, "zero length field name " + "in format"); + return 0; + } + + /* success */ + return 2; } - else - len--; + break; } - - /* this is just plain text, return it */ - literal->ptr = start; - literal->end = start + len; - return 2; } + + /* end of string while searching for matching '}' */ + PyErr_SetString(PyExc_ValueError, "unmatched '{' in format"); + return 0; } @@ -826,27 +854,24 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, OutputString *output, int *recursion_level) { MarkupIterator iter; - int is_markup; int format_spec_needs_expanding; int result; - SubString str; + SubString literal; SubString field_name; SubString format_spec; STRINGLIB_CHAR conversion; MarkupIterator_init(&iter, input->ptr, input->end - input->ptr); - while ((result = MarkupIterator_next(&iter, &is_markup, &str, &field_name, + while ((result = MarkupIterator_next(&iter, &literal, &field_name, &format_spec, &conversion, &format_spec_needs_expanding)) == 2) { - if (is_markup) { + if (!output_data(output, literal.ptr, literal.end - literal.ptr)) + return 0; + if (field_name.ptr != field_name.end) if (!output_markup(&field_name, &format_spec, format_spec_needs_expanding, conversion, output, args, kwargs, recursion_level)) return 0; - } - else - if (!output_data(output, str.ptr, str.end-str.ptr)) - return 0; } return result; } @@ -947,17 +972,12 @@ formatteriter_dealloc(formatteriterobject *it) } /* returns a tuple: - (is_markup, literal, field_name, format_spec, conversion) - if is_markup == True: - literal is None - field_name is the string before the ':' - format_spec is the string after the ':' - conversion is either None, or the string after the '!' - if is_markup == False: - literal is the literal string - field_name is None - format_spec is None - conversion is None + (literal, field_name, format_spec, conversion) + + literal is any literal text to output. might be zero length + field_name is the string before the ':'. might be None + format_spec is the string after the ':'. mibht be None + conversion is either None, or the string after the '!' */ static PyObject * formatteriter_next(formatteriterobject *it) @@ -966,10 +986,9 @@ formatteriter_next(formatteriterobject *it) SubString field_name; SubString format_spec; Py_UNICODE conversion; - int is_markup; int format_spec_needs_expanding; - int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal, - &field_name, &format_spec, &conversion, + int result = MarkupIterator_next(&it->it_markup, &literal, &field_name, + &format_spec, &conversion, &format_spec_needs_expanding); /* all of the SubString objects point into it->str, so no @@ -984,50 +1003,39 @@ formatteriter_next(formatteriterobject *it) PyObject *format_spec_str = NULL; PyObject *conversion_str = NULL; PyObject *tuple = NULL; + int has_field = field_name.ptr != field_name.end; - if (is_markup) { - /* field_name, format_spec, and conversion are returned */ - literal_str = Py_None; - Py_INCREF(literal_str); - - field_name_str = SubString_new_object(&field_name); - if (field_name_str == NULL) - goto error; - - format_spec_str = SubString_new_object(&format_spec); - if (format_spec_str == NULL) - goto error; - - /* if the conversion is not specified, return a None, - otherwise create a one length string with the - conversion characater */ - if (conversion == '\0') { - conversion_str = Py_None; - Py_INCREF(conversion_str); - } - else - conversion_str = PyUnicode_FromUnicode(&conversion, - 1); - if (conversion_str == NULL) - goto error; - } - else { - /* only literal is returned */ - literal_str = SubString_new_object(&literal); - if (literal_str == NULL) - goto error; - - field_name_str = Py_None; - format_spec_str = Py_None; - conversion_str = Py_None; + literal_str = SubString_new_object(&literal); + if (literal_str == NULL) + goto done; + + field_name_str = SubString_new_object(&field_name); + if (field_name_str == NULL) + goto done; + + /* if field_name is non-zero length, return a string for + format_spec (even if zero length), else return None */ + format_spec_str = (has_field ? + SubString_new_object_or_empty : + SubString_new_object)(&format_spec); + if (format_spec_str == NULL) + goto done; - Py_INCREF(field_name_str); - Py_INCREF(format_spec_str); + /* if the conversion is not specified, return a None, + otherwise create a one length string with the conversion + character */ + if (conversion == '\0') { + conversion_str = Py_None; Py_INCREF(conversion_str); } + else + conversion_str = PyUnicode_FromUnicode(&conversion, 1); + if (conversion_str == NULL) + goto done; + tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str, conversion_str); - error: + done: Py_XDECREF(literal_str); Py_XDECREF(field_name_str); Py_XDECREF(format_spec_str); @@ -1149,7 +1157,7 @@ fieldnameiter_next(fieldnameiterobject *it) is_attr_obj = PyBool_FromLong(is_attr); if (is_attr_obj == NULL) - goto error; + goto done; /* either an integer or a string */ if (idx != -1) @@ -1157,22 +1165,16 @@ fieldnameiter_next(fieldnameiterobject *it) else obj = SubString_new_object(&name); if (obj == NULL) - goto error; + goto done; /* return a tuple of values */ result = PyTuple_Pack(2, is_attr_obj, obj); - if (result == NULL) - goto error; - return result; - - error: - Py_XDECREF(result); + done: Py_XDECREF(is_attr_obj); Py_XDECREF(obj); - return NULL; + return result; } - return NULL; } static PyMethodDef fieldnameiter_methods[] = { @@ -1240,7 +1242,7 @@ formatter_field_name_split(PyUnicodeObject *self) if (!field_name_split(STRINGLIB_STR(self), STRINGLIB_LEN(self), &first, &first_idx, &it->it_field)) - goto error; + goto done; /* first becomes an integer, if possible; else a string */ if (first_idx != -1) @@ -1249,12 +1251,12 @@ formatter_field_name_split(PyUnicodeObject *self) /* convert "first" into a string object */ first_obj = SubString_new_object(&first); if (first_obj == NULL) - goto error; + goto done; /* return a tuple of values */ result = PyTuple_Pack(2, first_obj, it); -error: +done: Py_XDECREF(it); Py_XDECREF(first_obj); return result; |