summaryrefslogtreecommitdiffstats
path: root/Objects/unicodeobject.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r--Objects/unicodeobject.c681
1 files changed, 344 insertions, 337 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 84f55b5..c46da45 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -598,7 +598,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
*/
if ((*f == 'l' || *f == 'z') &&
(f[1] == 'd' || f[1] == 'u'))
- ++f;
+ ++f;
switch (*f) {
case 'c':
@@ -7981,6 +7981,347 @@ unicode__format__(PyObject *self, PyObject *args)
}
+/********************* Formatter Iterator ************************/
+
+/* this is used to implement string.Formatter.vparse(). it exists so
+ Formatter can share code with the built in unicode.format()
+ method */
+
+typedef struct {
+ PyObject_HEAD
+
+ PyUnicodeObject *str;
+
+ MarkupIterator it_markup;
+} formatteriterobject;
+
+static void
+formatteriter_dealloc(formatteriterobject *it)
+{
+ Py_XDECREF(it->str);
+ PyObject_FREE(it);
+}
+
+/* returns a tuple:
+ (is_markup, literal, field_name, format_spec, conversion)
+ if is_markup == True:
+ literal is None
+ field_name is the string before the ':'
+ format_spec is the string after the ':'
+ conversion is either None, or the string after the '!'
+ if is_markup == False:
+ literal is the literal string
+ field_name is None
+ format_spec is None
+ conversion is None
+*/
+static PyObject *
+formatteriter_next(formatteriterobject *it)
+{
+ SubString literal;
+ SubString field_name;
+ SubString format_spec;
+ Py_UNICODE conversion;
+ int is_markup;
+ int format_spec_needs_expanding;
+ int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal,
+ &field_name, &format_spec, &conversion,
+ &format_spec_needs_expanding);
+
+ /* all of the SubString objects point into it->str, so no
+ memory management needs to be done on them */
+ assert(0 <= result && result <= 2);
+ if (result == 0) {
+ /* error has already been set */
+ return NULL;
+ } else if (result == 1) {
+ /* end of iterator */
+ return NULL;
+ } else {
+ PyObject *is_markup_bool = NULL;
+ PyObject *literal_str = NULL;
+ PyObject *field_name_str = NULL;
+ PyObject *format_spec_str = NULL;
+ PyObject *conversion_str = NULL;
+ PyObject *tuple = NULL;
+
+ is_markup_bool = PyBool_FromLong(is_markup);
+ if (!is_markup_bool)
+ return NULL;
+
+ if (is_markup) {
+ /* field_name, format_spec, and conversion are
+ returned */
+ literal_str = Py_None;
+ Py_INCREF(literal_str);
+
+ field_name_str = SubString_new_object(&field_name);
+ if (field_name_str == NULL)
+ goto error;
+
+ format_spec_str = SubString_new_object(&format_spec);
+ if (format_spec_str == NULL)
+ goto error;
+
+ /* if the conversion is not specified, return
+ a None, otherwise create a one length
+ string with the conversion characater */
+ if (conversion == '\0') {
+ conversion_str = Py_None;
+ Py_INCREF(conversion_str);
+ } else
+ conversion_str = PyUnicode_FromUnicode(&conversion,
+ 1);
+ if (conversion_str == NULL)
+ goto error;
+ } else {
+ /* only literal is returned */
+ literal_str = SubString_new_object(&literal);
+ if (literal_str == NULL)
+ goto error;
+
+ field_name_str = Py_None;
+ format_spec_str = Py_None;
+ conversion_str = Py_None;
+
+ Py_INCREF(field_name_str);
+ Py_INCREF(format_spec_str);
+ Py_INCREF(conversion_str);
+ }
+ tuple = PyTuple_Pack(5, is_markup_bool, literal_str,
+ field_name_str, format_spec_str,
+ conversion_str);
+ error:
+ Py_XDECREF(is_markup_bool);
+ Py_XDECREF(literal_str);
+ Py_XDECREF(field_name_str);
+ Py_XDECREF(format_spec_str);
+ Py_XDECREF(conversion_str);
+ return tuple;
+ }
+}
+
+static PyMethodDef formatteriter_methods[] = {
+ {NULL, NULL} /* sentinel */
+};
+
+PyTypeObject PyFormatterIter_Type = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ "formatteriterator", /* tp_name */
+ sizeof(formatteriterobject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)formatteriter_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ 0, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)formatteriter_next, /* tp_iternext */
+ formatteriter_methods, /* tp_methods */
+ 0,
+};
+
+/* unicode_formatter_parser is used to implement
+ string.Formatter.vformat. it parses a string and returns tuples
+ describing the parsed elements. It's a wrapper around
+ stringlib/string_format.h's MarkupIterator */
+static PyObject *
+unicode_formatter_parser(PyUnicodeObject *self)
+{
+ formatteriterobject *it;
+
+ it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
+ if (it == NULL)
+ return NULL;
+
+ /* take ownership, give the object to the iterator */
+ Py_INCREF(self);
+ it->str = self;
+
+ /* initialize the contained MarkupIterator */
+ MarkupIterator_init(&it->it_markup,
+ PyUnicode_AS_UNICODE(self),
+ PyUnicode_GET_SIZE(self));
+
+ return (PyObject *)it;
+}
+/***************** end Formatter Iterator ************************/
+/********************* FieldName Iterator ************************/
+
+/* this is used to implement string.Formatter.vparse(). it parses
+ the field name into attribute and item values. */
+
+typedef struct {
+ PyObject_HEAD
+
+ PyUnicodeObject *str;
+
+ FieldNameIterator it_field;
+} fieldnameiterobject;
+
+static void
+fieldnameiter_dealloc(fieldnameiterobject *it)
+{
+ Py_XDECREF(it->str);
+ PyObject_FREE(it);
+}
+
+/* returns a tuple:
+ (is_attr, value)
+ is_attr is true if we used attribute syntax (e.g., '.foo')
+ false if we used index syntax (e.g., '[foo]')
+ value is an integer or string
+*/
+static PyObject *
+fieldnameiter_next(fieldnameiterobject *it)
+{
+ int result;
+ int is_attr;
+ Py_ssize_t idx;
+ SubString name;
+
+ result = FieldNameIterator_next(&it->it_field, &is_attr,
+ &idx, &name);
+ if (result == 0 || result == 1) {
+ /* if 0, error has already been set, if 1, iterator is empty */
+ return NULL;
+ } else {
+ PyObject* result = NULL;
+ PyObject* is_attr_obj = NULL;
+ PyObject* obj = NULL;
+
+ is_attr_obj = PyBool_FromLong(is_attr);
+ if (is_attr_obj == NULL)
+ goto error;
+
+ /* either an integer or a string */
+ if (idx != -1)
+ obj = PyInt_FromSsize_t(idx);
+ else
+ obj = STRINGLIB_NEW(name.ptr, name.end - name.ptr);
+ if (obj == NULL)
+ goto error;
+
+ /* return a tuple of values */
+ result = PyTuple_Pack(2, is_attr_obj, obj);
+ if (result == NULL)
+ goto error;
+
+ return result;
+
+ error:
+ Py_XDECREF(result);
+ Py_XDECREF(is_attr_obj);
+ Py_XDECREF(obj);
+ return NULL;
+ }
+ return NULL;
+}
+
+static PyMethodDef fieldnameiter_methods[] = {
+ {NULL, NULL} /* sentinel */
+};
+
+static PyTypeObject PyFieldNameIter_Type = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ "fieldnameiterator", /* tp_name */
+ sizeof(fieldnameiterobject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)fieldnameiter_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ PyObject_GenericGetAttr, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT, /* tp_flags */
+ 0, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)fieldnameiter_next, /* tp_iternext */
+ fieldnameiter_methods, /* tp_methods */
+ 0};
+
+/* unicode_formatter_field_name_split is used to implement
+ string.Formatter.vformat. it takes an PEP 3101 "field name", and
+ returns a tuple of (first, rest): "first", the part before the
+ first '.' or '['; and "rest", an iterator for the rest of the field
+ name. it's a wrapper around stringlib/string_format.h's
+ field_name_split. The iterator it returns is a
+ FieldNameIterator */
+static PyObject *
+unicode_formatter_field_name_split(PyUnicodeObject *self)
+{
+ SubString first;
+ Py_ssize_t first_idx;
+ fieldnameiterobject *it;
+
+ PyObject *first_obj = NULL;
+ PyObject *result = NULL;
+
+ it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
+ if (it == NULL)
+ return NULL;
+
+ /* take ownership, give the object to the iterator. this is
+ just to keep the field_name alive */
+ Py_INCREF(self);
+ it->str = self;
+
+ if (!field_name_split(STRINGLIB_STR(self),
+ STRINGLIB_LEN(self),
+ &first, &first_idx, &it->it_field))
+ goto error;
+
+ /* first becomes an integer, if possible, else a string */
+ if (first_idx != -1)
+ first_obj = PyInt_FromSsize_t(first_idx);
+ else
+ /* convert "first" into a string object */
+ first_obj = STRINGLIB_NEW(first.ptr, first.end - first.ptr);
+ if (first_obj == NULL)
+ goto error;
+
+ /* return a tuple of values */
+ result = PyTuple_Pack(2, first_obj, it);
+
+error:
+ Py_XDECREF(it);
+ Py_XDECREF(first_obj);
+ return result;
+}
+/***************** end FieldName Iterator ************************/
+
+
static PyObject *
unicode_getnewargs(PyUnicodeObject *v)
{
@@ -8036,6 +8377,8 @@ static PyMethodDef unicode_methods[] = {
{"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
{"format", (PyCFunction) unicode_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
{"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
+ {"_formatter_field_name_split", (PyCFunction) unicode_formatter_field_name_split, METH_NOARGS},
+ {"_formatter_parser", (PyCFunction) unicode_formatter_parser, METH_NOARGS},
#if 0
{"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__},
#endif
@@ -9141,342 +9484,6 @@ void _Py_ReleaseInternedUnicodeStrings(void)
}
-/********************* Formatter Iterator ************************/
-
-/* this is used to implement string.Formatter.vparse(). it exists so
- Formatter can share code with the built in unicode.format()
- method */
-
-typedef struct {
- PyObject_HEAD
-
- /* we know this to be a unicode object, but since we just keep
- it around to keep the object alive, having it as PyObject
- is okay */
- PyObject *str;
-
- MarkupIterator it_markup;
-} formatteriterobject;
-
-static void
-formatteriter_dealloc(formatteriterobject *it)
-{
- Py_XDECREF(it->str);
- PyObject_FREE(it);
-}
-
-/* returns a tuple:
- (is_markup, literal, field_name, format_spec, conversion)
- if is_markup == True:
- literal is None
- field_name is the string before the ':'
- format_spec is the string after the ':'
- conversion is either None, or the string after the '!'
- if is_markup == False:
- literal is the literal string
- field_name is None
- format_spec is None
- conversion is None
-*/
-static PyObject *
-formatteriter_next(formatteriterobject *it)
-{
- SubString literal;
- SubString field_name;
- SubString format_spec;
- Py_UNICODE conversion;
- int is_markup;
- int format_spec_needs_expanding;
- int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal,
- &field_name, &format_spec, &conversion,
- &format_spec_needs_expanding);
-
- /* all of the SubString objects point into it->str, so no
- memory management needs to be done on them */
- assert(0 <= result && result <= 2);
- if (result == 0) {
- /* error has already been set */
- return NULL;
- } else if (result == 1) {
- /* end of iterator */
- return NULL;
- } else {
- PyObject *is_markup_bool = NULL;
- PyObject *literal_str = NULL;
- PyObject *field_name_str = NULL;
- PyObject *format_spec_str = NULL;
- PyObject *conversion_str = NULL;
- PyObject *tuple = NULL;
-
- is_markup_bool = PyBool_FromLong(is_markup);
- if (!is_markup_bool)
- return NULL;
-
- if (is_markup) {
- /* field_name, format_spec, and conversion are
- returned */
- literal_str = Py_None;
- Py_INCREF(literal_str);
-
- field_name_str = SubString_new_object(&field_name);
- if (field_name_str == NULL)
- goto error;
-
- format_spec_str = SubString_new_object(&format_spec);
- if (format_spec_str == NULL)
- goto error;
-
- /* if the conversion is not specified, return
- a None, otherwise create a one length
- string with the conversion characater */
- if (conversion == '\0') {
- conversion_str = Py_None;
- Py_INCREF(conversion_str);
- } else
- conversion_str = PyUnicode_FromUnicode(&conversion,
- 1);
- if (conversion_str == NULL)
- goto error;
- } else {
- /* only literal is returned */
- literal_str = SubString_new_object(&literal);
- if (literal_str == NULL)
- goto error;
-
- field_name_str = Py_None;
- format_spec_str = Py_None;
- conversion_str = Py_None;
-
- Py_INCREF(field_name_str);
- Py_INCREF(format_spec_str);
- Py_INCREF(conversion_str);
- }
- tuple = PyTuple_Pack(5, is_markup_bool, literal_str,
- field_name_str, format_spec_str,
- conversion_str);
- error:
- Py_XDECREF(is_markup_bool);
- Py_XDECREF(literal_str);
- Py_XDECREF(field_name_str);
- Py_XDECREF(format_spec_str);
- Py_XDECREF(conversion_str);
- return tuple;
- }
-}
-
-static PyMethodDef formatteriter_methods[] = {
- {NULL, NULL} /* sentinel */
-};
-
-PyTypeObject PyFormatterIter_Type = {
- PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "formatteriterator", /* tp_name */
- sizeof(formatteriterobject), /* tp_basicsize */
- 0, /* tp_itemsize */
- /* methods */
- (destructor)formatteriter_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_compare */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags */
- 0, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- PyObject_SelfIter, /* tp_iter */
- (iternextfunc)formatteriter_next, /* tp_iternext */
- formatteriter_methods, /* tp_methods */
- 0,
-};
-
-PyObject *
-_PyUnicode_FormatterIterator(PyObject *str)
-{
- formatteriterobject *it;
-
- assert(PyUnicode_Check(str));
- it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
- if (it == NULL)
- return NULL;
-
- /* take ownership, give the object to the iterator */
- Py_INCREF(str);
- it->str = str;
-
- /* initialize the contained MarkupIterator */
- MarkupIterator_init(&it->it_markup,
- PyUnicode_AS_UNICODE(str),
- PyUnicode_GET_SIZE(str));
-
- return (PyObject *)it;
-}
-
-/********************* FieldName Iterator ************************/
-
-/* this is used to implement string.Formatter.vparse(). it parses
- the field name into attribute and item values. */
-
-typedef struct {
- PyObject_HEAD
-
- /* we know this to be a unicode object, but since we just keep
- it around to keep the object alive, having it as PyObject
- is okay */
- PyObject *str;
-
- FieldNameIterator it_field;
-} fieldnameiterobject;
-
-static void
-fieldnameiter_dealloc(fieldnameiterobject *it)
-{
- Py_XDECREF(it->str);
- PyObject_FREE(it);
-}
-
-/* returns a tuple:
- (is_attr, value)
- is_attr is true if we used attribute syntax (e.g., '.foo')
- false if we used index syntax (e.g., '[foo]')
- value is an integer or string
-*/
-static PyObject *
-fieldnameiter_next(fieldnameiterobject *it)
-{
- int result;
- int is_attr;
- Py_ssize_t idx;
- SubString name;
-
- result = FieldNameIterator_next(&it->it_field, &is_attr,
- &idx, &name);
- if (result == 0 || result == 1) {
- /* if 0, error has already been set, if 1, iterator is empty */
- return NULL;
- } else {
- PyObject* result = NULL;
- PyObject* is_attr_obj = NULL;
- PyObject* obj = NULL;
-
- is_attr_obj = PyBool_FromLong(is_attr);
- if (is_attr_obj == NULL)
- goto error;
-
- /* either an integer or a string */
- if (idx != -1)
- obj = PyInt_FromSsize_t(idx);
- else
- obj = STRINGLIB_NEW(name.ptr, name.end - name.ptr);
- if (obj == NULL)
- goto error;
-
- /* return a tuple of values */
- result = PyTuple_Pack(2, is_attr_obj, obj);
- if (result == NULL)
- goto error;
-
- return result;
-
- error:
- Py_XDECREF(result);
- Py_XDECREF(is_attr_obj);
- Py_XDECREF(obj);
- return NULL;
- }
- return NULL;
-}
-
-static PyMethodDef fieldnameiter_methods[] = {
- {NULL, NULL} /* sentinel */
-};
-
-static PyTypeObject PyFieldNameIter_Type = {
- PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "fieldnameiterator", /* tp_name */
- sizeof(fieldnameiterobject), /* tp_basicsize */
- 0, /* tp_itemsize */
- /* methods */
- (destructor)fieldnameiter_dealloc, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_compare */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- 0, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT, /* tp_flags */
- 0, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- PyObject_SelfIter, /* tp_iter */
- (iternextfunc)fieldnameiter_next, /* tp_iternext */
- fieldnameiter_methods, /* tp_methods */
- 0};
-
-PyObject *
-_PyUnicode_FormatterFieldNameSplit(PyObject *field_name)
-{
- SubString first;
- Py_ssize_t first_idx;
- fieldnameiterobject *it;
-
- PyObject *first_obj = NULL;
- PyObject *result = NULL;
-
- assert(PyUnicode_Check(field_name));
- it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
- if (it == NULL)
- return NULL;
-
- /* take ownership, give the object to the iterator. this is
- just to keep the field_name alive */
- Py_INCREF(field_name);
- it->str = field_name;
-
- if (!field_name_split(STRINGLIB_STR(field_name),
- STRINGLIB_LEN(field_name),
- &first, &first_idx, &it->it_field))
- goto error;
-
- /* first becomes an integer, if possible, else a string */
- if (first_idx != -1)
- first_obj = PyInt_FromSsize_t(first_idx);
- else
- /* convert "first" into a string object */
- first_obj = STRINGLIB_NEW(first.ptr, first.end - first.ptr);
- if (first_obj == NULL)
- goto error;
-
- /* return a tuple of values */
- result = PyTuple_Pack(2, first_obj, it);
-
-error:
- Py_XDECREF(it);
- Py_XDECREF(first_obj);
- return result;
-}
-
/********************* Unicode Iterator **************************/
typedef struct {