summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Smith <eric@trueblade.com>2009-03-14 11:57:26 (GMT)
committerEric Smith <eric@trueblade.com>2009-03-14 11:57:26 (GMT)
commit6f42edb6821462c9ce02c6c4e2f57731b43956ad (patch)
tree22a648f118e175d637901f3575d3549e975eb445
parentfeeafff0529c8fa0b5ab6a3086464cce8fd1b3fc (diff)
downloadcpython-6f42edb6821462c9ce02c6c4e2f57731b43956ad.zip
cpython-6f42edb6821462c9ce02c6c4e2f57731b43956ad.tar.gz
cpython-6f42edb6821462c9ce02c6c4e2f57731b43956ad.tar.bz2
Issue 5237, Allow auto-numbered replacement fields in str.format() strings.
For simple uses for str.format(), this makes the typing easier. Hopfully this will help in the adoption of str.format(). For example: 'The {} is {}'.format('sky', 'blue') You can mix and matcth auto-numbering and named replacement fields: 'The {} is {color}'.format('sky', color='blue') But you can't mix and match auto-numbering and specified numbering: 'The {0} is {}'.format('sky', 'blue') ValueError: cannot switch from manual field specification to automatic field numbering Will port to 3.1.
-rw-r--r--Lib/test/test_str.py36
-rw-r--r--Lib/test/test_unicode.py36
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/stringlib/string_format.h173
4 files changed, 198 insertions, 50 deletions
diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py
index 044711c..51d2680 100644
--- a/Lib/test/test_str.py
+++ b/Lib/test/test_str.py
@@ -347,9 +347,9 @@ class StrTest(
self.assertRaises(ValueError, "{0!}".format, 0)
self.assertRaises(ValueError, "{0!rs}".format, 0)
self.assertRaises(ValueError, "{!}".format)
- self.assertRaises(ValueError, "{:}".format)
- self.assertRaises(ValueError, "{:s}".format)
- self.assertRaises(ValueError, "{}".format)
+ self.assertRaises(IndexError, "{:}".format)
+ self.assertRaises(IndexError, "{:s}".format)
+ self.assertRaises(IndexError, "{}".format)
# can't have a replacement on the field name portion
self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4)
@@ -364,6 +364,36 @@ class StrTest(
self.assertRaises(ValueError, format, "", "-")
self.assertRaises(ValueError, "{0:=s}".format, '')
+ def test_format_auto_numbering(self):
+ class C:
+ def __init__(self, x=100):
+ self._x = x
+ def __format__(self, spec):
+ return spec
+
+ self.assertEqual('{}'.format(10), '10')
+ self.assertEqual('{:5}'.format('s'), 's ')
+ self.assertEqual('{!r}'.format('s'), "'s'")
+ self.assertEqual('{._x}'.format(C(10)), '10')
+ self.assertEqual('{[1]}'.format([1, 2]), '2')
+ self.assertEqual('{[a]}'.format({'a':4, 'b':2}), '4')
+ self.assertEqual('a{}b{}c'.format(0, 1), 'a0b1c')
+
+ self.assertEqual('a{:{}}b'.format('x', '^10'), 'a x b')
+ self.assertEqual('a{:{}x}b'.format(20, '#'), 'a0x14b')
+
+ # can't mix and match numbering and auto-numbering
+ self.assertRaises(ValueError, '{}{1}'.format, 1, 2)
+ self.assertRaises(ValueError, '{1}{}'.format, 1, 2)
+ self.assertRaises(ValueError, '{:{1}}'.format, 1, 2)
+ self.assertRaises(ValueError, '{0:{}}'.format, 1, 2)
+
+ # can mix and match auto-numbering and named
+ self.assertEqual('{f}{}'.format(4, f='test'), 'test4')
+ self.assertEqual('{}{f}'.format(4, f='test'), '4test')
+ self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3')
+ self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g')
+
def test_buffer_is_readonly(self):
self.assertRaises(TypeError, sys.stdin.readinto, b"")
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 70e4787..356f570 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1087,9 +1087,9 @@ class UnicodeTest(
self.assertRaises(ValueError, "{0!}".format, 0)
self.assertRaises(ValueError, "{0!rs}".format, 0)
self.assertRaises(ValueError, "{!}".format)
- self.assertRaises(ValueError, "{:}".format)
- self.assertRaises(ValueError, "{:s}".format)
- self.assertRaises(ValueError, "{}".format)
+ self.assertRaises(IndexError, "{:}".format)
+ self.assertRaises(IndexError, "{:s}".format)
+ self.assertRaises(IndexError, "{}".format)
# can't have a replacement on the field name portion
self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4)
@@ -1113,6 +1113,36 @@ class UnicodeTest(
# will fail
self.assertRaises(UnicodeEncodeError, "foo{0}".format, u'\u1000bar')
+ def test_format_auto_numbering(self):
+ class C:
+ def __init__(self, x=100):
+ self._x = x
+ def __format__(self, spec):
+ return spec
+
+ self.assertEqual(u'{}'.format(10), u'10')
+ self.assertEqual(u'{:5}'.format('s'), u's ')
+ self.assertEqual(u'{!r}'.format('s'), u"'s'")
+ self.assertEqual(u'{._x}'.format(C(10)), u'10')
+ self.assertEqual(u'{[1]}'.format([1, 2]), u'2')
+ self.assertEqual(u'{[a]}'.format({'a':4, 'b':2}), u'4')
+ self.assertEqual(u'a{}b{}c'.format(0, 1), u'a0b1c')
+
+ self.assertEqual(u'a{:{}}b'.format('x', '^10'), u'a x b')
+ self.assertEqual(u'a{:{}x}b'.format(20, '#'), u'a0x14b')
+
+ # can't mix and match numbering and auto-numbering
+ self.assertRaises(ValueError, u'{}{1}'.format, 1, 2)
+ self.assertRaises(ValueError, u'{1}{}'.format, 1, 2)
+ self.assertRaises(ValueError, u'{:{1}}'.format, 1, 2)
+ self.assertRaises(ValueError, u'{0:{}}'.format, 1, 2)
+
+ # can mix and match auto-numbering and named
+ self.assertEqual(u'{f}{}'.format(4, f='test'), u'test4')
+ self.assertEqual(u'{}{f}'.format(4, f='test'), u'4test')
+ self.assertEqual(u'{:{f}}{g}{}'.format(1, 3, g='g', f=2), u' 1g3')
+ self.assertEqual(u'{f:{}}{}{g}'.format(2, 4, f=1, g='g'), u' 14g')
+
def test_raiseMemError(self):
# Ensure that the freelist contains a consistent object, even
# when a string allocation fails with a MemoryError.
diff --git a/Misc/NEWS b/Misc/NEWS
index d5ddd10..9432295 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1
Core and Builtins
-----------------
+- Issue #5237: Allow auto-numbered fields in str.format(). For
+ example: '{} {}'.format(1, 2) == '1 2'.
+
- Issue #3652: Make the 'line' argument for warnings.showwarning() a
requirement. Means the DeprecationWarning from Python 2.6 can go away.
diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h
index 600e6b0..cc7c66f 100644
--- a/Objects/stringlib/string_format.h
+++ b/Objects/stringlib/string_format.h
@@ -31,10 +31,23 @@ typedef struct {
} SubString;
+typedef enum {
+ ANS_INIT,
+ ANS_AUTO,
+ ANS_MANUAL,
+} AutoNumberState; /* Keep track if we're auto-numbering fields */
+
+/* Keeps track of our auto-numbering state, and which number field we're on */
+typedef struct {
+ AutoNumberState an_state;
+ int an_field_number;
+} AutoNumber;
+
+
/* forward declaration for recursion */
static PyObject *
build_string(SubString *input, PyObject *args, PyObject *kwargs,
- int recursion_depth);
+ int recursion_depth, AutoNumber *auto_number);
@@ -42,6 +55,13 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
/************************** Utility functions ************************/
/************************************************************************/
+static void
+AutoNumber_Init(AutoNumber *auto_number)
+{
+ auto_number->an_state = ANS_INIT;
+ auto_number->an_field_number = 0;
+}
+
/* fill in a SubString from a pointer and length */
Py_LOCAL_INLINE(void)
SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
@@ -74,6 +94,32 @@ SubString_new_object_or_empty(SubString *str)
return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
}
+/* Return 1 if an error has been detected switching between automatic
+ field numbering and manual field specification, else return 0. Set
+ ValueError on error. */
+static int
+autonumber_state_error(AutoNumberState state, int field_name_is_empty)
+{
+ if (state == ANS_MANUAL) {
+ if (field_name_is_empty) {
+ PyErr_SetString(PyExc_ValueError, "cannot switch from "
+ "manual field specification to "
+ "automatic field numbering");
+ return 1;
+ }
+ }
+ else {
+ if (!field_name_is_empty) {
+ PyErr_SetString(PyExc_ValueError, "cannot switch from "
+ "automatic field numbering to "
+ "manual field specification");
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
/************************************************************************/
/*********** Output string management functions ****************/
/************************************************************************/
@@ -352,11 +398,14 @@ FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
*/
static int
field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
- Py_ssize_t *first_idx, FieldNameIterator *rest)
+ Py_ssize_t *first_idx, FieldNameIterator *rest,
+ AutoNumber *auto_number)
{
STRINGLIB_CHAR c;
STRINGLIB_CHAR *p = ptr;
STRINGLIB_CHAR *end = ptr + len;
+ int field_name_is_empty;
+ int using_numeric_index;
/* find the part up until the first '.' or '[' */
while (p < end) {
@@ -380,15 +429,41 @@ field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
/* see if "first" is an integer, in which case it's used as an index */
*first_idx = get_integer(first);
- /* zero length string is an error */
- if (first->ptr >= first->end) {
- PyErr_SetString(PyExc_ValueError, "empty field name");
- goto error;
+ field_name_is_empty = first->ptr >= first->end;
+
+ /* If the field name is omitted or if we have a numeric index
+ specified, then we're doing numeric indexing into args. */
+ using_numeric_index = field_name_is_empty || *first_idx != -1;
+
+ /* We always get here exactly one time for each field we're
+ processing. And we get here in field order (counting by left
+ braces). So this is the perfect place to handle automatic field
+ numbering if the field name is omitted. */
+
+ /* Check if we need to do the auto-numbering. It's not needed if
+ we're called from string.Format routines, because it's handled
+ in that class by itself. */
+ if (auto_number) {
+ /* Initialize our auto numbering state if this is the first
+ time we're either auto-numbering or manually numbering. */
+ if (auto_number->an_state == ANS_INIT && using_numeric_index)
+ auto_number->an_state = field_name_is_empty ?
+ ANS_AUTO : ANS_MANUAL;
+
+ /* Make sure our state is consistent with what we're doing
+ this time through. Only check if we're using a numeric
+ index. */
+ if (using_numeric_index)
+ if (autonumber_state_error(auto_number->an_state,
+ field_name_is_empty))
+ return 0;
+ /* Zero length field means we want to do auto-numbering of the
+ fields. */
+ if (field_name_is_empty)
+ *first_idx = (auto_number->an_field_number)++;
}
return 1;
-error:
- return 0;
}
@@ -398,7 +473,8 @@ error:
the entire input string.
*/
static PyObject *
-get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
+get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
+ AutoNumber *auto_number)
{
PyObject *obj = NULL;
int ok;
@@ -409,7 +485,7 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
FieldNameIterator rest;
if (!field_name_split(input->ptr, input->end - input->ptr, &first,
- &index, &rest)) {
+ &index, &rest, auto_number)) {
goto error;
}
@@ -557,14 +633,18 @@ static int
parse_field(SubString *str, SubString *field_name, SubString *format_spec,
STRINGLIB_CHAR *conversion)
{
+ /* Note this function works if the field name is zero length,
+ which is good. Zero length field names are handled later, in
+ field_name_split. */
+
STRINGLIB_CHAR c = 0;
/* initialize these, as they may be empty */
*conversion = '\0';
SubString_init(format_spec, NULL, 0);
- /* search for the field name. it's terminated by the end of the
- string, or a ':' or '!' */
+ /* Search for the field name. it's terminated by the end of
+ the string, or a ':' or '!' */
field_name->ptr = str->ptr;
while (str->ptr < str->end) {
switch (c = *(str->ptr++)) {
@@ -607,15 +687,12 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
}
}
}
-
- return 1;
-
}
- else {
+ else
/* end of string, there's no format_spec or conversion */
field_name->end = str->ptr;
- return 1;
- }
+
+ return 1;
}
/************************************************************************/
@@ -642,8 +719,8 @@ MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
string (or something to be expanded) */
static int
MarkupIterator_next(MarkupIterator *self, SubString *literal,
- SubString *field_name, SubString *format_spec,
- STRINGLIB_CHAR *conversion,
+ int *field_present, SubString *field_name,
+ SubString *format_spec, STRINGLIB_CHAR *conversion,
int *format_spec_needs_expanding)
{
int at_end;
@@ -659,6 +736,7 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
SubString_init(format_spec, NULL, 0);
*conversion = '\0';
*format_spec_needs_expanding = 0;
+ *field_present = 0;
/* No more input, end of iterator. This is the normal exit
path. */
@@ -720,6 +798,7 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
/* this is markup, find the end of the string by counting nested
braces. note that this prohibits escaped braces, so that
format_specs cannot have braces in them. */
+ *field_present = 1;
count = 1;
start = self->str.ptr;
@@ -744,13 +823,6 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
if (parse_field(&s, field_name, format_spec, conversion) == 0)
return 0;
- /* a zero length field_name is an error */
- if (field_name->ptr == field_name->end) {
- PyErr_SetString(PyExc_ValueError, "zero length field name "
- "in format");
- return 0;
- }
-
/* success */
return 2;
}
@@ -798,13 +870,17 @@ do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
compute the result and write it to output.
format_spec_needs_expanding is an optimization. if it's false,
just output the string directly, otherwise recursively expand the
- format_spec string. */
+ format_spec string.
+
+ field_name is allowed to be zero length, in which case we
+ are doing auto field numbering.
+*/
static int
output_markup(SubString *field_name, SubString *format_spec,
int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
OutputString *output, PyObject *args, PyObject *kwargs,
- int recursion_depth)
+ int recursion_depth, AutoNumber *auto_number)
{
PyObject *tmp = NULL;
PyObject *fieldobj = NULL;
@@ -813,7 +889,7 @@ output_markup(SubString *field_name, SubString *format_spec,
int result = 0;
/* convert field_name to an object */
- fieldobj = get_field_object(field_name, args, kwargs);
+ fieldobj = get_field_object(field_name, args, kwargs, auto_number);
if (fieldobj == NULL)
goto done;
@@ -830,7 +906,8 @@ output_markup(SubString *field_name, SubString *format_spec,
/* if needed, recurively compute the format_spec */
if (format_spec_needs_expanding) {
- tmp = build_string(format_spec, args, kwargs, recursion_depth-1);
+ tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
+ auto_number);
if (tmp == NULL)
goto done;
@@ -864,26 +941,28 @@ done:
*/
static int
do_markup(SubString *input, PyObject *args, PyObject *kwargs,
- OutputString *output, int recursion_depth)
+ OutputString *output, int recursion_depth, AutoNumber *auto_number)
{
MarkupIterator iter;
int format_spec_needs_expanding;
int result;
+ int field_present;
SubString literal;
SubString field_name;
SubString format_spec;
STRINGLIB_CHAR conversion;
MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
- while ((result = MarkupIterator_next(&iter, &literal, &field_name,
- &format_spec, &conversion,
+ while ((result = MarkupIterator_next(&iter, &literal, &field_present,
+ &field_name, &format_spec,
+ &conversion,
&format_spec_needs_expanding)) == 2) {
if (!output_data(output, literal.ptr, literal.end - literal.ptr))
return 0;
- if (field_name.ptr != field_name.end)
+ if (field_present)
if (!output_markup(&field_name, &format_spec,
format_spec_needs_expanding, conversion, output,
- args, kwargs, recursion_depth))
+ args, kwargs, recursion_depth, auto_number))
return 0;
}
return result;
@@ -896,7 +975,7 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
*/
static PyObject *
build_string(SubString *input, PyObject *args, PyObject *kwargs,
- int recursion_depth)
+ int recursion_depth, AutoNumber *auto_number)
{
OutputString output;
PyObject *result = NULL;
@@ -918,7 +997,8 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
INITIAL_SIZE_INCREMENT))
goto done;
- if (!do_markup(input, args, kwargs, &output, recursion_depth)) {
+ if (!do_markup(input, args, kwargs, &output, recursion_depth,
+ auto_number)) {
goto done;
}
@@ -952,8 +1032,11 @@ do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
*/
int recursion_depth = 2;
+ AutoNumber auto_number;
+
+ AutoNumber_Init(&auto_number);
SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
- return build_string(&input, args, kwargs, recursion_depth);
+ return build_string(&input, args, kwargs, recursion_depth, &auto_number);
}
@@ -998,8 +1081,9 @@ formatteriter_next(formatteriterobject *it)
SubString format_spec;
STRINGLIB_CHAR conversion;
int format_spec_needs_expanding;
- int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
- &format_spec, &conversion,
+ int field_present;
+ int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
+ &field_name, &format_spec, &conversion,
&format_spec_needs_expanding);
/* all of the SubString objects point into it->str, so no
@@ -1014,7 +1098,6 @@ formatteriter_next(formatteriterobject *it)
PyObject *format_spec_str = NULL;
PyObject *conversion_str = NULL;
PyObject *tuple = NULL;
- int has_field = field_name.ptr != field_name.end;
literal_str = SubString_new_object(&literal);
if (literal_str == NULL)
@@ -1026,7 +1109,7 @@ formatteriter_next(formatteriterobject *it)
/* if field_name is non-zero length, return a string for
format_spec (even if zero length), else return None */
- format_spec_str = (has_field ?
+ format_spec_str = (field_present ?
SubString_new_object_or_empty :
SubString_new_object)(&format_spec);
if (format_spec_str == NULL)
@@ -1250,9 +1333,11 @@ formatter_field_name_split(STRINGLIB_OBJECT *self)
Py_INCREF(self);
it->str = self;
+ /* Pass in auto_number = NULL. We'll return an empty string for
+ first_obj in that case. */
if (!field_name_split(STRINGLIB_STR(self),
STRINGLIB_LEN(self),
- &first, &first_idx, &it->it_field))
+ &first, &first_idx, &it->it_field, NULL))
goto done;
/* first becomes an integer, if possible; else a string */