diff options
-rw-r--r-- | Lib/test/test_unicode.py | 36 | ||||
-rw-r--r-- | Misc/NEWS | 4 | ||||
-rw-r--r-- | Objects/stringlib/string_format.h | 173 |
3 files changed, 166 insertions, 47 deletions
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index f16a78a..b40ee84 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -683,9 +683,9 @@ class UnicodeTest( self.assertRaises(ValueError, "{0!}".format, 0) self.assertRaises(ValueError, "{0!rs}".format, 0) self.assertRaises(ValueError, "{!}".format) - self.assertRaises(ValueError, "{:}".format) - self.assertRaises(ValueError, "{:s}".format) - self.assertRaises(ValueError, "{}".format) + self.assertRaises(IndexError, "{:}".format) + self.assertRaises(IndexError, "{:s}".format) + self.assertRaises(IndexError, "{}".format) # can't have a replacement on the field name portion self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4) @@ -704,6 +704,36 @@ class UnicodeTest( self.assertRaises(ValueError, format, '', '#') self.assertRaises(ValueError, format, '', '#20') + def test_format_auto_numbering(self): + class C: + def __init__(self, x=100): + self._x = x + def __format__(self, spec): + return spec + + self.assertEqual('{}'.format(10), '10') + self.assertEqual('{:5}'.format('s'), 's ') + self.assertEqual('{!r}'.format('s'), "'s'") + self.assertEqual('{._x}'.format(C(10)), '10') + self.assertEqual('{[1]}'.format([1, 2]), '2') + self.assertEqual('{[a]}'.format({'a':4, 'b':2}), '4') + self.assertEqual('a{}b{}c'.format(0, 1), 'a0b1c') + + self.assertEqual('a{:{}}b'.format('x', '^10'), 'a x b') + self.assertEqual('a{:{}x}b'.format(20, '#'), 'a0x14b') + + # can't mix and match numbering and auto-numbering + self.assertRaises(ValueError, '{}{1}'.format, 1, 2) + self.assertRaises(ValueError, '{1}{}'.format, 1, 2) + self.assertRaises(ValueError, '{:{1}}'.format, 1, 2) + self.assertRaises(ValueError, '{0:{}}'.format, 1, 2) + + # can mix and match auto-numbering and named + self.assertEqual('{f}{}'.format(4, f='test'), 'test4') + self.assertEqual('{}{f}'.format(4, f='test'), '4test') + self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3') + self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g') + def test_formatting(self): string_tests.MixinStrUnicodeUserStringTest.test_formatting(self) # Testing Unicode formatting strings... @@ -12,6 +12,9 @@ What's New in Python 3.1 alpha 2? Core and Builtins ----------------- +- Issue #5237: Allow auto-numbered fields in str.format(). For + example: '{} {}'.format(1, 2) == '1 2'. + - Issue #5392: when a very low recursion limit was set, the interpreter would abort with a fatal error after the recursion limit was hit twice. @@ -33,6 +36,7 @@ What's New in Python 3.1 alpha 1 Core and Builtins ----------------- +======= - The io module has been reimplemented in C for speed. - Give dict views an informative __repr__. diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h index b828770..fabc7a1 100644 --- a/Objects/stringlib/string_format.h +++ b/Objects/stringlib/string_format.h @@ -31,10 +31,23 @@ typedef struct { } SubString; +typedef enum { + ANS_INIT, + ANS_AUTO, + ANS_MANUAL, +} AutoNumberState; /* Keep track if we're auto-numbering fields */ + +/* Keeps track of our auto-numbering state, and which number field we're on */ +typedef struct { + AutoNumberState an_state; + int an_field_number; +} AutoNumber; + + /* forward declaration for recursion */ static PyObject * build_string(SubString *input, PyObject *args, PyObject *kwargs, - int recursion_depth); + int recursion_depth, AutoNumber *auto_number); @@ -42,6 +55,13 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs, /************************** Utility functions ************************/ /************************************************************************/ +static void +AutoNumber_Init(AutoNumber *auto_number) +{ + auto_number->an_state = ANS_INIT; + auto_number->an_field_number = 0; +} + /* fill in a SubString from a pointer and length */ Py_LOCAL_INLINE(void) SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len) @@ -74,6 +94,32 @@ SubString_new_object_or_empty(SubString *str) return STRINGLIB_NEW(str->ptr, str->end - str->ptr); } +/* Return 1 if an error has been detected switching between automatic + field numbering and manual field specification, else return 0. Set + ValueError on error. */ +static int +autonumber_state_error(AutoNumberState state, int field_name_is_empty) +{ + if (state == ANS_MANUAL) { + if (field_name_is_empty) { + PyErr_SetString(PyExc_ValueError, "cannot switch from " + "manual field specification to " + "automatic field numbering"); + return 1; + } + } + else { + if (!field_name_is_empty) { + PyErr_SetString(PyExc_ValueError, "cannot switch from " + "automatic field numbering to " + "manual field specification"); + return 1; + } + } + return 0; +} + + /************************************************************************/ /*********** Output string management functions ****************/ /************************************************************************/ @@ -352,11 +398,14 @@ FieldNameIterator_next(FieldNameIterator *self, int *is_attribute, */ static int field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first, - Py_ssize_t *first_idx, FieldNameIterator *rest) + Py_ssize_t *first_idx, FieldNameIterator *rest, + AutoNumber *auto_number) { STRINGLIB_CHAR c; STRINGLIB_CHAR *p = ptr; STRINGLIB_CHAR *end = ptr + len; + int field_name_is_empty; + int using_numeric_index; /* find the part up until the first '.' or '[' */ while (p < end) { @@ -380,15 +429,41 @@ field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first, /* see if "first" is an integer, in which case it's used as an index */ *first_idx = get_integer(first); - /* zero length string is an error */ - if (first->ptr >= first->end) { - PyErr_SetString(PyExc_ValueError, "empty field name"); - goto error; + field_name_is_empty = first->ptr >= first->end; + + /* If the field name is omitted or if we have a numeric index + specified, then we're doing numeric indexing into args. */ + using_numeric_index = field_name_is_empty || *first_idx != -1; + + /* We always get here exactly one time for each field we're + processing. And we get here in field order (counting by left + braces). So this is the perfect place to handle automatic field + numbering if the field name is omitted. */ + + /* Check if we need to do the auto-numbering. It's not needed if + we're called from string.Format routines, because it's handled + in that class by itself. */ + if (auto_number) { + /* Initialize our auto numbering state if this is the first + time we're either auto-numbering or manually numbering. */ + if (auto_number->an_state == ANS_INIT && using_numeric_index) + auto_number->an_state = field_name_is_empty ? + ANS_AUTO : ANS_MANUAL; + + /* Make sure our state is consistent with what we're doing + this time through. Only check if we're using a numeric + index. */ + if (using_numeric_index) + if (autonumber_state_error(auto_number->an_state, + field_name_is_empty)) + return 0; + /* Zero length field means we want to do auto-numbering of the + fields. */ + if (field_name_is_empty) + *first_idx = (auto_number->an_field_number)++; } return 1; -error: - return 0; } @@ -398,7 +473,8 @@ error: the entire input string. */ static PyObject * -get_field_object(SubString *input, PyObject *args, PyObject *kwargs) +get_field_object(SubString *input, PyObject *args, PyObject *kwargs, + AutoNumber *auto_number) { PyObject *obj = NULL; int ok; @@ -409,7 +485,7 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs) FieldNameIterator rest; if (!field_name_split(input->ptr, input->end - input->ptr, &first, - &index, &rest)) { + &index, &rest, auto_number)) { goto error; } @@ -548,14 +624,18 @@ static int parse_field(SubString *str, SubString *field_name, SubString *format_spec, STRINGLIB_CHAR *conversion) { + /* Note this function works if the field name is zero length, + which is good. Zero length field names are handled later, in + field_name_split. */ + STRINGLIB_CHAR c = 0; /* initialize these, as they may be empty */ *conversion = '\0'; SubString_init(format_spec, NULL, 0); - /* search for the field name. it's terminated by the end of the - string, or a ':' or '!' */ + /* Search for the field name. it's terminated by the end of + the string, or a ':' or '!' */ field_name->ptr = str->ptr; while (str->ptr < str->end) { switch (c = *(str->ptr++)) { @@ -598,15 +678,12 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec, } } } - - return 1; - } - else { + else /* end of string, there's no format_spec or conversion */ field_name->end = str->ptr; - return 1; - } + + return 1; } /************************************************************************/ @@ -633,8 +710,8 @@ MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len) string (or something to be expanded) */ static int MarkupIterator_next(MarkupIterator *self, SubString *literal, - SubString *field_name, SubString *format_spec, - STRINGLIB_CHAR *conversion, + int *field_present, SubString *field_name, + SubString *format_spec, STRINGLIB_CHAR *conversion, int *format_spec_needs_expanding) { int at_end; @@ -650,6 +727,7 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal, SubString_init(format_spec, NULL, 0); *conversion = '\0'; *format_spec_needs_expanding = 0; + *field_present = 0; /* No more input, end of iterator. This is the normal exit path. */ @@ -711,6 +789,7 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal, /* this is markup, find the end of the string by counting nested braces. note that this prohibits escaped braces, so that format_specs cannot have braces in them. */ + *field_present = 1; count = 1; start = self->str.ptr; @@ -735,13 +814,6 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal, if (parse_field(&s, field_name, format_spec, conversion) == 0) return 0; - /* a zero length field_name is an error */ - if (field_name->ptr == field_name->end) { - PyErr_SetString(PyExc_ValueError, "zero length field name " - "in format"); - return 0; - } - /* success */ return 2; } @@ -793,13 +865,17 @@ do_conversion(PyObject *obj, STRINGLIB_CHAR conversion) compute the result and write it to output. format_spec_needs_expanding is an optimization. if it's false, just output the string directly, otherwise recursively expand the - format_spec string. */ + format_spec string. + + field_name is allowed to be zero length, in which case we + are doing auto field numbering. +*/ static int output_markup(SubString *field_name, SubString *format_spec, int format_spec_needs_expanding, STRINGLIB_CHAR conversion, OutputString *output, PyObject *args, PyObject *kwargs, - int recursion_depth) + int recursion_depth, AutoNumber *auto_number) { PyObject *tmp = NULL; PyObject *fieldobj = NULL; @@ -808,7 +884,7 @@ output_markup(SubString *field_name, SubString *format_spec, int result = 0; /* convert field_name to an object */ - fieldobj = get_field_object(field_name, args, kwargs); + fieldobj = get_field_object(field_name, args, kwargs, auto_number); if (fieldobj == NULL) goto done; @@ -825,7 +901,8 @@ output_markup(SubString *field_name, SubString *format_spec, /* if needed, recurively compute the format_spec */ if (format_spec_needs_expanding) { - tmp = build_string(format_spec, args, kwargs, recursion_depth-1); + tmp = build_string(format_spec, args, kwargs, recursion_depth-1, + auto_number); if (tmp == NULL) goto done; @@ -859,26 +936,28 @@ done: */ static int do_markup(SubString *input, PyObject *args, PyObject *kwargs, - OutputString *output, int recursion_depth) + OutputString *output, int recursion_depth, AutoNumber *auto_number) { MarkupIterator iter; int format_spec_needs_expanding; int result; + int field_present; SubString literal; SubString field_name; SubString format_spec; STRINGLIB_CHAR conversion; MarkupIterator_init(&iter, input->ptr, input->end - input->ptr); - while ((result = MarkupIterator_next(&iter, &literal, &field_name, - &format_spec, &conversion, + while ((result = MarkupIterator_next(&iter, &literal, &field_present, + &field_name, &format_spec, + &conversion, &format_spec_needs_expanding)) == 2) { if (!output_data(output, literal.ptr, literal.end - literal.ptr)) return 0; - if (field_name.ptr != field_name.end) + if (field_present) if (!output_markup(&field_name, &format_spec, format_spec_needs_expanding, conversion, output, - args, kwargs, recursion_depth)) + args, kwargs, recursion_depth, auto_number)) return 0; } return result; @@ -891,7 +970,7 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs, */ static PyObject * build_string(SubString *input, PyObject *args, PyObject *kwargs, - int recursion_depth) + int recursion_depth, AutoNumber *auto_number) { OutputString output; PyObject *result = NULL; @@ -913,7 +992,8 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs, INITIAL_SIZE_INCREMENT)) goto done; - if (!do_markup(input, args, kwargs, &output, recursion_depth)) { + if (!do_markup(input, args, kwargs, &output, recursion_depth, + auto_number)) { goto done; } @@ -947,8 +1027,11 @@ do_string_format(PyObject *self, PyObject *args, PyObject *kwargs) */ int recursion_depth = 2; + AutoNumber auto_number; + + AutoNumber_Init(&auto_number); SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self)); - return build_string(&input, args, kwargs, recursion_depth); + return build_string(&input, args, kwargs, recursion_depth, &auto_number); } @@ -993,8 +1076,9 @@ formatteriter_next(formatteriterobject *it) SubString format_spec; STRINGLIB_CHAR conversion; int format_spec_needs_expanding; - int result = MarkupIterator_next(&it->it_markup, &literal, &field_name, - &format_spec, &conversion, + int field_present; + int result = MarkupIterator_next(&it->it_markup, &literal, &field_present, + &field_name, &format_spec, &conversion, &format_spec_needs_expanding); /* all of the SubString objects point into it->str, so no @@ -1009,7 +1093,6 @@ formatteriter_next(formatteriterobject *it) PyObject *format_spec_str = NULL; PyObject *conversion_str = NULL; PyObject *tuple = NULL; - int has_field = field_name.ptr != field_name.end; literal_str = SubString_new_object(&literal); if (literal_str == NULL) @@ -1021,7 +1104,7 @@ formatteriter_next(formatteriterobject *it) /* if field_name is non-zero length, return a string for format_spec (even if zero length), else return None */ - format_spec_str = (has_field ? + format_spec_str = (field_present ? SubString_new_object_or_empty : SubString_new_object)(&format_spec); if (format_spec_str == NULL) @@ -1245,9 +1328,11 @@ formatter_field_name_split(STRINGLIB_OBJECT *self) Py_INCREF(self); it->str = self; + /* Pass in auto_number = NULL. We'll return an empty string for + first_obj in that case. */ if (!field_name_split(STRINGLIB_STR(self), STRINGLIB_LEN(self), - &first, &first_idx, &it->it_field)) + &first, &first_idx, &it->it_field, NULL)) goto done; /* first becomes an integer, if possible; else a string */ |