diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 681 |
1 files changed, 344 insertions, 337 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 84f55b5..c46da45 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -598,7 +598,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs) */ if ((*f == 'l' || *f == 'z') && (f[1] == 'd' || f[1] == 'u')) - ++f; + ++f; switch (*f) { case 'c': @@ -7981,6 +7981,347 @@ unicode__format__(PyObject *self, PyObject *args) } +/********************* Formatter Iterator ************************/ + +/* this is used to implement string.Formatter.vparse(). it exists so + Formatter can share code with the built in unicode.format() + method */ + +typedef struct { + PyObject_HEAD + + PyUnicodeObject *str; + + MarkupIterator it_markup; +} formatteriterobject; + +static void +formatteriter_dealloc(formatteriterobject *it) +{ + Py_XDECREF(it->str); + PyObject_FREE(it); +} + +/* returns a tuple: + (is_markup, literal, field_name, format_spec, conversion) + if is_markup == True: + literal is None + field_name is the string before the ':' + format_spec is the string after the ':' + conversion is either None, or the string after the '!' + if is_markup == False: + literal is the literal string + field_name is None + format_spec is None + conversion is None +*/ +static PyObject * +formatteriter_next(formatteriterobject *it) +{ + SubString literal; + SubString field_name; + SubString format_spec; + Py_UNICODE conversion; + int is_markup; + int format_spec_needs_expanding; + int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal, + &field_name, &format_spec, &conversion, + &format_spec_needs_expanding); + + /* all of the SubString objects point into it->str, so no + memory management needs to be done on them */ + assert(0 <= result && result <= 2); + if (result == 0) { + /* error has already been set */ + return NULL; + } else if (result == 1) { + /* end of iterator */ + return NULL; + } else { + PyObject *is_markup_bool = NULL; + PyObject *literal_str = NULL; + PyObject *field_name_str = NULL; + PyObject *format_spec_str = NULL; + PyObject *conversion_str = NULL; + PyObject *tuple = NULL; + + is_markup_bool = PyBool_FromLong(is_markup); + if (!is_markup_bool) + return NULL; + + if (is_markup) { + /* field_name, format_spec, and conversion are + returned */ + literal_str = Py_None; + Py_INCREF(literal_str); + + field_name_str = SubString_new_object(&field_name); + if (field_name_str == NULL) + goto error; + + format_spec_str = SubString_new_object(&format_spec); + if (format_spec_str == NULL) + goto error; + + /* if the conversion is not specified, return + a None, otherwise create a one length + string with the conversion characater */ + if (conversion == '\0') { + conversion_str = Py_None; + Py_INCREF(conversion_str); + } else + conversion_str = PyUnicode_FromUnicode(&conversion, + 1); + if (conversion_str == NULL) + goto error; + } else { + /* only literal is returned */ + literal_str = SubString_new_object(&literal); + if (literal_str == NULL) + goto error; + + field_name_str = Py_None; + format_spec_str = Py_None; + conversion_str = Py_None; + + Py_INCREF(field_name_str); + Py_INCREF(format_spec_str); + Py_INCREF(conversion_str); + } + tuple = PyTuple_Pack(5, is_markup_bool, literal_str, + field_name_str, format_spec_str, + conversion_str); + error: + Py_XDECREF(is_markup_bool); + Py_XDECREF(literal_str); + Py_XDECREF(field_name_str); + Py_XDECREF(format_spec_str); + Py_XDECREF(conversion_str); + return tuple; + } +} + +static PyMethodDef formatteriter_methods[] = { + {NULL, NULL} /* sentinel */ +}; + +PyTypeObject PyFormatterIter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "formatteriterator", /* tp_name */ + sizeof(formatteriterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)formatteriter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)formatteriter_next, /* tp_iternext */ + formatteriter_methods, /* tp_methods */ + 0, +}; + +/* unicode_formatter_parser is used to implement + string.Formatter.vformat. it parses a string and returns tuples + describing the parsed elements. It's a wrapper around + stringlib/string_format.h's MarkupIterator */ +static PyObject * +unicode_formatter_parser(PyUnicodeObject *self) +{ + formatteriterobject *it; + + it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); + if (it == NULL) + return NULL; + + /* take ownership, give the object to the iterator */ + Py_INCREF(self); + it->str = self; + + /* initialize the contained MarkupIterator */ + MarkupIterator_init(&it->it_markup, + PyUnicode_AS_UNICODE(self), + PyUnicode_GET_SIZE(self)); + + return (PyObject *)it; +} +/***************** end Formatter Iterator ************************/ +/********************* FieldName Iterator ************************/ + +/* this is used to implement string.Formatter.vparse(). it parses + the field name into attribute and item values. */ + +typedef struct { + PyObject_HEAD + + PyUnicodeObject *str; + + FieldNameIterator it_field; +} fieldnameiterobject; + +static void +fieldnameiter_dealloc(fieldnameiterobject *it) +{ + Py_XDECREF(it->str); + PyObject_FREE(it); +} + +/* returns a tuple: + (is_attr, value) + is_attr is true if we used attribute syntax (e.g., '.foo') + false if we used index syntax (e.g., '[foo]') + value is an integer or string +*/ +static PyObject * +fieldnameiter_next(fieldnameiterobject *it) +{ + int result; + int is_attr; + Py_ssize_t idx; + SubString name; + + result = FieldNameIterator_next(&it->it_field, &is_attr, + &idx, &name); + if (result == 0 || result == 1) { + /* if 0, error has already been set, if 1, iterator is empty */ + return NULL; + } else { + PyObject* result = NULL; + PyObject* is_attr_obj = NULL; + PyObject* obj = NULL; + + is_attr_obj = PyBool_FromLong(is_attr); + if (is_attr_obj == NULL) + goto error; + + /* either an integer or a string */ + if (idx != -1) + obj = PyInt_FromSsize_t(idx); + else + obj = STRINGLIB_NEW(name.ptr, name.end - name.ptr); + if (obj == NULL) + goto error; + + /* return a tuple of values */ + result = PyTuple_Pack(2, is_attr_obj, obj); + if (result == NULL) + goto error; + + return result; + + error: + Py_XDECREF(result); + Py_XDECREF(is_attr_obj); + Py_XDECREF(obj); + return NULL; + } + return NULL; +} + +static PyMethodDef fieldnameiter_methods[] = { + {NULL, NULL} /* sentinel */ +}; + +static PyTypeObject PyFieldNameIter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "fieldnameiterator", /* tp_name */ + sizeof(fieldnameiterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)fieldnameiter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)fieldnameiter_next, /* tp_iternext */ + fieldnameiter_methods, /* tp_methods */ + 0}; + +/* unicode_formatter_field_name_split is used to implement + string.Formatter.vformat. it takes an PEP 3101 "field name", and + returns a tuple of (first, rest): "first", the part before the + first '.' or '['; and "rest", an iterator for the rest of the field + name. it's a wrapper around stringlib/string_format.h's + field_name_split. The iterator it returns is a + FieldNameIterator */ +static PyObject * +unicode_formatter_field_name_split(PyUnicodeObject *self) +{ + SubString first; + Py_ssize_t first_idx; + fieldnameiterobject *it; + + PyObject *first_obj = NULL; + PyObject *result = NULL; + + it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); + if (it == NULL) + return NULL; + + /* take ownership, give the object to the iterator. this is + just to keep the field_name alive */ + Py_INCREF(self); + it->str = self; + + if (!field_name_split(STRINGLIB_STR(self), + STRINGLIB_LEN(self), + &first, &first_idx, &it->it_field)) + goto error; + + /* first becomes an integer, if possible, else a string */ + if (first_idx != -1) + first_obj = PyInt_FromSsize_t(first_idx); + else + /* convert "first" into a string object */ + first_obj = STRINGLIB_NEW(first.ptr, first.end - first.ptr); + if (first_obj == NULL) + goto error; + + /* return a tuple of values */ + result = PyTuple_Pack(2, first_obj, it); + +error: + Py_XDECREF(it); + Py_XDECREF(first_obj); + return result; +} +/***************** end FieldName Iterator ************************/ + + static PyObject * unicode_getnewargs(PyUnicodeObject *v) { @@ -8036,6 +8377,8 @@ static PyMethodDef unicode_methods[] = { {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__}, {"format", (PyCFunction) unicode_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__}, + {"_formatter_field_name_split", (PyCFunction) unicode_formatter_field_name_split, METH_NOARGS}, + {"_formatter_parser", (PyCFunction) unicode_formatter_parser, METH_NOARGS}, #if 0 {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__}, #endif @@ -9141,342 +9484,6 @@ void _Py_ReleaseInternedUnicodeStrings(void) } -/********************* Formatter Iterator ************************/ - -/* this is used to implement string.Formatter.vparse(). it exists so - Formatter can share code with the built in unicode.format() - method */ - -typedef struct { - PyObject_HEAD - - /* we know this to be a unicode object, but since we just keep - it around to keep the object alive, having it as PyObject - is okay */ - PyObject *str; - - MarkupIterator it_markup; -} formatteriterobject; - -static void -formatteriter_dealloc(formatteriterobject *it) -{ - Py_XDECREF(it->str); - PyObject_FREE(it); -} - -/* returns a tuple: - (is_markup, literal, field_name, format_spec, conversion) - if is_markup == True: - literal is None - field_name is the string before the ':' - format_spec is the string after the ':' - conversion is either None, or the string after the '!' - if is_markup == False: - literal is the literal string - field_name is None - format_spec is None - conversion is None -*/ -static PyObject * -formatteriter_next(formatteriterobject *it) -{ - SubString literal; - SubString field_name; - SubString format_spec; - Py_UNICODE conversion; - int is_markup; - int format_spec_needs_expanding; - int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal, - &field_name, &format_spec, &conversion, - &format_spec_needs_expanding); - - /* all of the SubString objects point into it->str, so no - memory management needs to be done on them */ - assert(0 <= result && result <= 2); - if (result == 0) { - /* error has already been set */ - return NULL; - } else if (result == 1) { - /* end of iterator */ - return NULL; - } else { - PyObject *is_markup_bool = NULL; - PyObject *literal_str = NULL; - PyObject *field_name_str = NULL; - PyObject *format_spec_str = NULL; - PyObject *conversion_str = NULL; - PyObject *tuple = NULL; - - is_markup_bool = PyBool_FromLong(is_markup); - if (!is_markup_bool) - return NULL; - - if (is_markup) { - /* field_name, format_spec, and conversion are - returned */ - literal_str = Py_None; - Py_INCREF(literal_str); - - field_name_str = SubString_new_object(&field_name); - if (field_name_str == NULL) - goto error; - - format_spec_str = SubString_new_object(&format_spec); - if (format_spec_str == NULL) - goto error; - - /* if the conversion is not specified, return - a None, otherwise create a one length - string with the conversion characater */ - if (conversion == '\0') { - conversion_str = Py_None; - Py_INCREF(conversion_str); - } else - conversion_str = PyUnicode_FromUnicode(&conversion, - 1); - if (conversion_str == NULL) - goto error; - } else { - /* only literal is returned */ - literal_str = SubString_new_object(&literal); - if (literal_str == NULL) - goto error; - - field_name_str = Py_None; - format_spec_str = Py_None; - conversion_str = Py_None; - - Py_INCREF(field_name_str); - Py_INCREF(format_spec_str); - Py_INCREF(conversion_str); - } - tuple = PyTuple_Pack(5, is_markup_bool, literal_str, - field_name_str, format_spec_str, - conversion_str); - error: - Py_XDECREF(is_markup_bool); - Py_XDECREF(literal_str); - Py_XDECREF(field_name_str); - Py_XDECREF(format_spec_str); - Py_XDECREF(conversion_str); - return tuple; - } -} - -static PyMethodDef formatteriter_methods[] = { - {NULL, NULL} /* sentinel */ -}; - -PyTypeObject PyFormatterIter_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "formatteriterator", /* tp_name */ - sizeof(formatteriterobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)formatteriter_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - PyObject_SelfIter, /* tp_iter */ - (iternextfunc)formatteriter_next, /* tp_iternext */ - formatteriter_methods, /* tp_methods */ - 0, -}; - -PyObject * -_PyUnicode_FormatterIterator(PyObject *str) -{ - formatteriterobject *it; - - assert(PyUnicode_Check(str)); - it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); - if (it == NULL) - return NULL; - - /* take ownership, give the object to the iterator */ - Py_INCREF(str); - it->str = str; - - /* initialize the contained MarkupIterator */ - MarkupIterator_init(&it->it_markup, - PyUnicode_AS_UNICODE(str), - PyUnicode_GET_SIZE(str)); - - return (PyObject *)it; -} - -/********************* FieldName Iterator ************************/ - -/* this is used to implement string.Formatter.vparse(). it parses - the field name into attribute and item values. */ - -typedef struct { - PyObject_HEAD - - /* we know this to be a unicode object, but since we just keep - it around to keep the object alive, having it as PyObject - is okay */ - PyObject *str; - - FieldNameIterator it_field; -} fieldnameiterobject; - -static void -fieldnameiter_dealloc(fieldnameiterobject *it) -{ - Py_XDECREF(it->str); - PyObject_FREE(it); -} - -/* returns a tuple: - (is_attr, value) - is_attr is true if we used attribute syntax (e.g., '.foo') - false if we used index syntax (e.g., '[foo]') - value is an integer or string -*/ -static PyObject * -fieldnameiter_next(fieldnameiterobject *it) -{ - int result; - int is_attr; - Py_ssize_t idx; - SubString name; - - result = FieldNameIterator_next(&it->it_field, &is_attr, - &idx, &name); - if (result == 0 || result == 1) { - /* if 0, error has already been set, if 1, iterator is empty */ - return NULL; - } else { - PyObject* result = NULL; - PyObject* is_attr_obj = NULL; - PyObject* obj = NULL; - - is_attr_obj = PyBool_FromLong(is_attr); - if (is_attr_obj == NULL) - goto error; - - /* either an integer or a string */ - if (idx != -1) - obj = PyInt_FromSsize_t(idx); - else - obj = STRINGLIB_NEW(name.ptr, name.end - name.ptr); - if (obj == NULL) - goto error; - - /* return a tuple of values */ - result = PyTuple_Pack(2, is_attr_obj, obj); - if (result == NULL) - goto error; - - return result; - - error: - Py_XDECREF(result); - Py_XDECREF(is_attr_obj); - Py_XDECREF(obj); - return NULL; - } - return NULL; -} - -static PyMethodDef fieldnameiter_methods[] = { - {NULL, NULL} /* sentinel */ -}; - -static PyTypeObject PyFieldNameIter_Type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "fieldnameiterator", /* tp_name */ - sizeof(fieldnameiterobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)fieldnameiter_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT, /* tp_flags */ - 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - PyObject_SelfIter, /* tp_iter */ - (iternextfunc)fieldnameiter_next, /* tp_iternext */ - fieldnameiter_methods, /* tp_methods */ - 0}; - -PyObject * -_PyUnicode_FormatterFieldNameSplit(PyObject *field_name) -{ - SubString first; - Py_ssize_t first_idx; - fieldnameiterobject *it; - - PyObject *first_obj = NULL; - PyObject *result = NULL; - - assert(PyUnicode_Check(field_name)); - it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); - if (it == NULL) - return NULL; - - /* take ownership, give the object to the iterator. this is - just to keep the field_name alive */ - Py_INCREF(field_name); - it->str = field_name; - - if (!field_name_split(STRINGLIB_STR(field_name), - STRINGLIB_LEN(field_name), - &first, &first_idx, &it->it_field)) - goto error; - - /* first becomes an integer, if possible, else a string */ - if (first_idx != -1) - first_obj = PyInt_FromSsize_t(first_idx); - else - /* convert "first" into a string object */ - first_obj = STRINGLIB_NEW(first.ptr, first.end - first.ptr); - if (first_obj == NULL) - goto error; - - /* return a tuple of values */ - result = PyTuple_Pack(2, first_obj, it); - -error: - Py_XDECREF(it); - Py_XDECREF(first_obj); - return result; -} - /********************* Unicode Iterator **************************/ typedef struct { |