diff options
author | Eric Smith <eric@trueblade.com> | 2007-08-26 22:27:13 (GMT) |
---|---|---|
committer | Eric Smith <eric@trueblade.com> | 2007-08-26 22:27:13 (GMT) |
commit | 7ade6485abde95c5cc9676ad3e476ba3aca98037 (patch) | |
tree | fa5710899c3e376f89eb6a6460e06f3feee62d58 /Objects | |
parent | 2bf4d5ba2881725bb7695bc0573bab0e2ca4fec5 (diff) | |
download | cpython-7ade6485abde95c5cc9676ad3e476ba3aca98037.zip cpython-7ade6485abde95c5cc9676ad3e476ba3aca98037.tar.gz cpython-7ade6485abde95c5cc9676ad3e476ba3aca98037.tar.bz2 |
PEP 3101: Completed string.Formatter class. Reimplemented field_name to object transformation.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/stringlib/string_format.h | 425 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 165 |
2 files changed, 417 insertions, 173 deletions
diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h index b81a7e3..2799141 100644 --- a/Objects/stringlib/string_format.h +++ b/Objects/stringlib/string_format.h @@ -72,23 +72,6 @@ SetError(const char *s) return PyErr_Format(PyExc_ValueError, "%s in format string", s); } -/* - check_input returns True if we still have characters - left in the input string. - - XXX: make this function go away when better error handling is - implemented. -*/ -Py_LOCAL_INLINE(int) -check_input(SubString *input) -{ - if (input->ptr < input->end) - return 1; - PyErr_SetString(PyExc_ValueError, - "unterminated replacement field"); - return 0; -} - /************************************************************************/ /*********** Output string management functions ****************/ /************************************************************************/ @@ -161,46 +144,22 @@ output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count) /*********** Format string parsing -- integers and identifiers *********/ /************************************************************************/ -/* - end_identifier returns true if a character marks - the end of an identifier string. - - Although the PEP specifies that identifiers are - numbers or valid Python identifiers, we just let - getattr/getitem handle that, so the implementation - is more flexible than the PEP would indicate. -*/ -Py_LOCAL_INLINE(int) -end_identifier(STRINGLIB_CHAR c) +static Py_ssize_t +get_integer(const SubString *str) { - switch (c) { - case '.': case '[': case ']': - return 1; - default: - return 0; - } -} + Py_ssize_t accumulator = 0; + Py_ssize_t digitval; + Py_ssize_t oldaccumulator; + STRINGLIB_CHAR *p; -/* - get_integer consumes 0 or more decimal digit characters from an - input string, updates *result with the corresponding positive - integer, and returns the number of digits consumed. + /* empty string is an error */ + if (str->ptr >= str->end) + return -1; - returns -1 on error. -*/ -static int -get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end, - Py_ssize_t *result) -{ - Py_ssize_t accumulator, digitval, oldaccumulator; - int numdigits; - accumulator = numdigits = 0; - for (;;(*ptr)++, numdigits++) { - if (*ptr >= end) - break; - digitval = STRINGLIB_TODECIMAL(**ptr); + for (p = str->ptr; p < str->end; p++) { + digitval = STRINGLIB_TODECIMAL(*p); if (digitval < 0) - break; + return -1; /* This trick was copied from old Unicode format code. It's cute, but would really suck on an old machine with a slow divide @@ -216,70 +175,215 @@ get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end, } accumulator += digitval; } - *result = accumulator; - return numdigits; + return accumulator; } -/* - get_identifier is a bit of a misnomer. It returns a value for use - with getattr or getindex. This value will a string/unicode - object. The input cannot be zero length. Continues until end of - input, or end_identifier() returns true. -*/ +/************************************************************************/ +/******** Functions to get field objects and specification strings ******/ +/************************************************************************/ + +/* do the equivalent of obj.name */ static PyObject * -get_identifier(SubString *input) +getattr(PyObject *obj, SubString *name) { - STRINGLIB_CHAR *start; - - for (start = input->ptr; - input->ptr < input->end && !end_identifier(*input->ptr); - input->ptr++) - ; + PyObject *newobj; + PyObject *str = STRINGLIB_NEW(name->ptr, name->end - name->ptr); + if (str == NULL) + return NULL; + newobj = PyObject_GetAttr(obj, str); + Py_DECREF(str); + return newobj; +} - return STRINGLIB_NEW(start, input->ptr - start); +/* do the equivalent of obj[idx], where obj is a sequence */ +static PyObject * +getitem_sequence(PyObject *obj, Py_ssize_t idx) +{ + return PySequence_GetItem(obj, idx); +} - /* - We might want to add code here to check for invalid Python - identifiers. All identifiers are eventually passed to getattr - or getitem, so there is a check when used. However, we might - want to remove (or not) the ability to have strings like - "a/b" or " ab" or "-1" (which is not parsed as a number). - For now, this is left as an exercise for the first disgruntled - user... +/* do the equivalent of obj[idx], where obj is not a sequence */ +static PyObject * +getitem_idx(PyObject *obj, Py_ssize_t idx) +{ + PyObject *newobj; + PyObject *idx_obj = PyInt_FromSsize_t(idx); + if (idx_obj == NULL) + return NULL; + newobj = PyObject_GetItem(obj, idx_obj); + Py_DECREF(idx_obj); + return newobj; +} - if (XXX -- need check function) { - Py_DECREF(result); - PyErr_SetString(PyExc_ValueError, - "Invalid embedded Python identifier"); +/* do the equivalent of obj[name] */ +static PyObject * +getitem_str(PyObject *obj, SubString *name) +{ + PyObject *newobj; + PyObject *str = STRINGLIB_NEW(name->ptr, name->end - name->ptr); + if (str == NULL) return NULL; + newobj = PyObject_GetItem(obj, str); + Py_DECREF(str); + return newobj; +} + +typedef struct { + /* the entire string we're parsing. we assume that someone else + is managing its lifetime, and that it will exist for the + lifetime of the iterator. can be empty */ + SubString str; + + /* pointer to where we are inside field_name */ + STRINGLIB_CHAR *ptr; +} FieldNameIterator; + + +static int +FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr, + Py_ssize_t len) +{ + SubString_init(&self->str, ptr, len); + self->ptr = self->str.ptr; + return 1; +} + +static int +_FieldNameIterator_attr(FieldNameIterator *self, SubString *name) +{ + STRINGLIB_CHAR c; + + name->ptr = self->ptr; + + /* return everything until '.' or '[' */ + while (self->ptr < self->str.end) { + switch (c = *self->ptr++) { + case '[': + case '.': + /* backup so that we this character will be seen next time */ + self->ptr--; + break; + default: + continue; + } + break; } - */ + /* end of string is okay */ + name->end = self->ptr; + return 1; } -/************************************************************************/ -/******** Functions to get field objects and specification strings ******/ -/************************************************************************/ +static int +_FieldNameIterator_item(FieldNameIterator *self, SubString *name) +{ + STRINGLIB_CHAR c; -/* get_field_and_spec is the main function in this section. It parses - the format string well enough to return a field object to render along - with a field specification string. -*/ + name->ptr = self->ptr; -/* - look up key in our keyword arguments + /* return everything until ']' */ + while (self->ptr < self->str.end) { + switch (c = *self->ptr++) { + case ']': + break; + default: + continue; + } + break; + } + /* end of string is okay */ + /* don't include the ']' */ + name->end = self->ptr-1; + return 1; +} + +/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */ +static int +FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, + Py_ssize_t *name_idx, SubString *name) +{ + /* check at end of input */ + if (self->ptr >= self->str.end) + return 1; + + switch (*self->ptr++) { + case '.': + *is_attribute = 1; + if (_FieldNameIterator_attr(self, name) == 0) { + return 0; + } + *name_idx = -1; + break; + case '[': + *is_attribute = 0; + if (_FieldNameIterator_item(self, name) == 0) { + return 0; + } + *name_idx = get_integer(name); + break; + default: + /* interal error, can't get here */ + assert(0); + return 0; + } + + /* empty string is an error */ + if (name->ptr == name->end) { + PyErr_SetString(PyExc_ValueError, "Empty attribute in format string"); + return 0; + } + + return 2; +} + + +/* input: field_name + output: 'first' points to the part before the first '[' or '.' + 'first_idx' is -1 if 'first' is not an integer, otherwise + it's the value of first converted to an integer + 'rest' is an iterator to return the rest */ -static PyObject * -key_lookup(PyObject *kwargs, PyObject *key) +static int +field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first, + Py_ssize_t *first_idx, FieldNameIterator *rest) { - PyObject *result; + STRINGLIB_CHAR c; + STRINGLIB_CHAR *p = ptr; + STRINGLIB_CHAR *end = ptr + len; + + /* find the part up until the first '.' or '[' */ + while (p < end) { + switch (c = *p++) { + case '[': + case '.': + /* backup so that we this character is available to the + "rest" iterator */ + p--; + break; + default: + continue; + } + break; + } + + /* set up the return values */ + SubString_init(first, ptr, p - ptr); + FieldNameIterator_init(rest, p, end - p); + + /* see if "first" is an integer, in which case it's used as an index */ + *first_idx = get_integer(first); - if (kwargs && (result = PyDict_GetItem(kwargs, key)) != NULL) { - Py_INCREF(result); - return result; + /* zero length string is an error */ + if (first->ptr >= first->end) { + PyErr_SetString(PyExc_ValueError, "empty field name"); + goto error; } - return NULL; + + return 1; +error: + return 0; } + /* get_field_object returns the object inside {}, before the format_spec. It handles getindex and getattr lookups and consumes @@ -288,80 +392,71 @@ key_lookup(PyObject *kwargs, PyObject *key) static PyObject * get_field_object(SubString *input, PyObject *args, PyObject *kwargs) { - PyObject *myobj, *subobj, *newobj; - STRINGLIB_CHAR c; + PyObject *obj = NULL; + int ok; + int is_attribute; + SubString name; + SubString first; Py_ssize_t index; - int isindex, isnumeric, isargument; - - index = isnumeric = 0; /* Just to shut up the compiler warnings */ + FieldNameIterator rest; - myobj = args; - Py_INCREF(myobj); + if (!field_name_split(input->ptr, input->end - input->ptr, &first, + &index, &rest)) { + goto error; + } - for (isindex=1, isargument=1;;) { - if (!check_input(input)) - break; - if (!isindex) { - if ((subobj = get_identifier(input)) == NULL) - break; - newobj = PyObject_GetAttr(myobj, subobj); - Py_DECREF(subobj); - } else { - isnumeric = (STRINGLIB_ISDECIMAL(*input->ptr)); - if (isnumeric) - /* XXX: add error checking */ - get_integer(&input->ptr, input->end, &index); - - if (isnumeric && PySequence_Check(myobj)) - newobj = PySequence_GetItem(myobj, index); - else { - /* XXX -- do we need PyLong_FromLongLong? - Using ssizet, not int... */ - subobj = isnumeric ? - PyInt_FromLong(index) : - get_identifier(input); - if (subobj == NULL) - break; - if (isargument) { - newobj = key_lookup(kwargs, subobj); - } else { - newobj = PyObject_GetItem(myobj, subobj); - } - Py_DECREF(subobj); - } + if (index == -1) { + /* look up in kwargs */ + PyObject *key = STRINGLIB_NEW(first.ptr, first.end - first.ptr); + if (key == NULL) + goto error; + if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) { + PyErr_SetString(PyExc_ValueError, "Keyword argument not found " + "in format string"); + Py_DECREF(key); + goto error; } - Py_DECREF(myobj); - myobj = newobj; - if (myobj == NULL) - break; - if (!isargument && isindex) - if ((!check_input(input)) || (*(input->ptr++) != ']')) { - SetError("Expected ]"); - break; - } - - /* if at the end of input, return with myobj */ - if (input->ptr >= input->end) - return myobj; - - c = *input->ptr; - input->ptr++; - isargument = 0; - isindex = (c == '['); - if (!isindex && (c != '.')) { - SetError("Expected ., [, :, !, or }"); - break; + } else { + /* look up in args */ + obj = PySequence_GetItem(args, index); + if (obj == NULL) { + /* translate IndexError to a ValueError */ + PyErr_SetString(PyExc_ValueError, "Not enough positional arguments " + "in format string"); + goto error; } } - if ((myobj == NULL) && isargument) { - /* XXX: include more useful error information, like which - * keyword not found or which index missing */ - PyErr_Clear(); - return SetError(isnumeric - ? "Not enough positional arguments" - : "Keyword argument not found"); + + /* iterate over the rest of the field_name */ + while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index, + &name)) == 2) { + PyObject *tmp; + + if (is_attribute) + /* getattr lookup "." */ + tmp = getattr(obj, &name); + else + /* getitem lookup "[]" */ + if (index == -1) + tmp = getitem_str(obj, &name); + else + if (PySequence_Check(obj)) + tmp = getitem_sequence(obj, index); + else + /* not a sequence */ + tmp = getitem_idx(obj, index); + if (tmp == NULL) + goto error; + + /* assign to obj */ + Py_DECREF(obj); + obj = tmp; } - Py_XDECREF(myobj); + /* end of iterator, this is the non-error case */ + if (ok == 1) + return obj; +error: + Py_XDECREF(obj); return NULL; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index d10d26f..1421bb2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9161,9 +9161,8 @@ typedef struct { static void formatteriter_dealloc(formatteriterobject *it) { - _PyObject_GC_UNTRACK(it); - Py_XDECREF(it->str); - PyObject_GC_Del(it); + Py_XDECREF(it->str); + PyObject_FREE(it); } /* returns a tuple: @@ -9313,7 +9312,7 @@ _unicodeformatter_iterator(PyObject *str) { formatteriterobject *it; - it = PyObject_GC_New(formatteriterobject, &PyFormatterIter_Type); + it = PyObject_New(formatteriterobject, &PyFormatterIter_Type); if (it == NULL) return NULL; @@ -9326,17 +9325,167 @@ _unicodeformatter_iterator(PyObject *str) PyUnicode_AS_UNICODE(str), PyUnicode_GET_SIZE(str)); - _PyObject_GC_TRACK(it); return (PyObject *)it; } -PyObject * -_unicodeformatter_lookup(PyObject *field_name, PyObject *args, - PyObject *kwargs) +/********************* FieldName Iterator ************************/ + +/* this is used to implement string.Formatter.vparse(). it parses + the field name into attribute and item values. */ + +typedef struct { + PyObject_HEAD + + /* we know this to be a unicode object, but since we just keep + it around to keep the object alive, having it as PyObject + is okay */ + PyObject *str; + + FieldNameIterator it_field; +} fieldnameiterobject; + +static void +fieldnameiter_dealloc(fieldnameiterobject *it) { + Py_XDECREF(it->str); + PyObject_FREE(it); +} + +/* returns a tuple: + (is_attr, value) + is_attr is true if we used attribute syntax (e.g., '.foo') + false if we used index syntax (e.g., '[foo]') + value is an integer or string +*/ +static PyObject * +fieldnameiter_next(fieldnameiterobject *it) +{ + int result; + int is_attr; + Py_ssize_t idx; + SubString name; + + result = FieldNameIterator_next(&it->it_field, &is_attr, + &idx, &name); + if (result == 0 || result == 1) { + /* if 0, error has already been set, if 1, iterator is empty */ + return NULL; + } else { + PyObject* result = NULL; + PyObject* is_attr_obj = NULL; + PyObject* obj = NULL; + + is_attr_obj = PyBool_FromLong(is_attr); + if (is_attr_obj == NULL) + goto error; + + /* either an integer or a string */ + if (idx != -1) + obj = PyInt_FromSsize_t(idx); + else + obj = STRINGLIB_NEW(name.ptr, name.end - name.ptr); + if (obj == NULL) + goto error; + + /* return a tuple of values */ + result = PyTuple_Pack(2, is_attr_obj, obj); + if (result == NULL) + goto error; + + return result; + + error: + Py_XDECREF(result); + Py_XDECREF(is_attr_obj); + Py_XDECREF(obj); + return NULL; + } return NULL; } +static PyMethodDef fieldnameiter_methods[] = { + {NULL, NULL} /* sentinel */ +}; + +static PyTypeObject PyFieldNameIter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "fieldnameiterator", /* tp_name */ + sizeof(fieldnameiterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)fieldnameiter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)fieldnameiter_next, /* tp_iternext */ + fieldnameiter_methods, /* tp_methods */ + 0}; + +PyObject * +_unicodeformatter_field_name_split(PyObject *field_name) +{ + SubString first; + Py_ssize_t first_idx; + fieldnameiterobject *it; + + PyObject *first_obj = NULL; + PyObject *it_obj = NULL; + PyObject *result; + + it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type); + if (it == NULL) + goto error; + it->str = NULL; + it_obj = (PyObject *)it; + + if (!field_name_split(STRINGLIB_STR(field_name), + STRINGLIB_LEN(field_name), + &first, &first_idx, &it->it_field)) + goto error; + + /* first becomes an integer, if possible, else a string */ + if (first_idx != -1) + first_obj = PyInt_FromSsize_t(first_idx); + else + /* convert "first" into a string object */ + first_obj = STRINGLIB_NEW(first.ptr, first.end - first.ptr); + if (first_obj == NULL) + goto error; + + /* take ownership, give the object to the iterator. this is + just to keep the field_name alive */ + Py_INCREF(field_name); + it->str = field_name; + + /* return a tuple of values */ + result = PyTuple_Pack(2, first_obj, it_obj); + if (result == NULL) + goto error; + + return result; +error: + Py_XDECREF(it_obj); + Py_XDECREF(first_obj); + return NULL; +} /********************* Unicode Iterator **************************/ |