Issue 5237, Allow auto-numbered replacement fields in str.format() strings.

For simple uses for str.format(), this makes the typing easier. Hopfully this will help in the adoption of str.format(). For example: 'The {} is {}'.format('sky', 'blue') You can mix and matcth auto-numbering and named replacement fields: 'The {} is {color}'.format('sky', color='blue') But you can't mix and match auto-numbering and specified numbering: 'The {0} is {}'.format('sky', 'blue') ValueError: cannot switch from manual field specification to automatic field numbering Will port to 3.1.
author: Eric Smith <eric@trueblade.com> 2009-03-14 11:57:26 (GMT)
committer: Eric Smith <eric@trueblade.com> 2009-03-14 11:57:26 (GMT)
commit: 6f42edb6821462c9ce02c6c4e2f57731b43956ad (patch)
tree: 22a648f118e175d637901f3575d3549e975eb445
parent: feeafff0529c8fa0b5ab6a3086464cce8fd1b3fc (diff)
download: cpython-6f42edb6821462c9ce02c6c4e2f57731b43956ad.zip
cpython-6f42edb6821462c9ce02c6c4e2f57731b43956ad.tar.gz
cpython-6f42edb6821462c9ce02c6c4e2f57731b43956ad.tar.bz2
4 files changed, 198 insertions, 50 deletions
diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py
index 044711c..51d2680 100644
--- a/Lib/test/test_str.py
+++ b/Lib/test/test_str.py
@@ -347,9 +347,9 @@ class StrTest(
         self.assertRaises(ValueError, "{0!}".format, 0)
         self.assertRaises(ValueError, "{0!rs}".format, 0)
         self.assertRaises(ValueError, "{!}".format)
-        self.assertRaises(ValueError, "{:}".format)
-        self.assertRaises(ValueError, "{:s}".format)
-        self.assertRaises(ValueError, "{}".format)
+        self.assertRaises(IndexError, "{:}".format)
+        self.assertRaises(IndexError, "{:s}".format)
+        self.assertRaises(IndexError, "{}".format)
 
         # can't have a replacement on the field name portion
         self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4)
@@ -364,6 +364,36 @@ class StrTest(
         self.assertRaises(ValueError, format, "", "-")
         self.assertRaises(ValueError, "{0:=s}".format, '')
 
+    def test_format_auto_numbering(self):
+        class C:
+            def __init__(self, x=100):
+                self._x = x
+            def __format__(self, spec):
+                return spec
+
+        self.assertEqual('{}'.format(10), '10')
+        self.assertEqual('{:5}'.format('s'), 's    ')
+        self.assertEqual('{!r}'.format('s'), "'s'")
+        self.assertEqual('{._x}'.format(C(10)), '10')
+        self.assertEqual('{[1]}'.format([1, 2]), '2')
+        self.assertEqual('{[a]}'.format({'a':4, 'b':2}), '4')
+        self.assertEqual('a{}b{}c'.format(0, 1), 'a0b1c')
+
+        self.assertEqual('a{:{}}b'.format('x', '^10'), 'a    x     b')
+        self.assertEqual('a{:{}x}b'.format(20, '#'), 'a0x14b')
+
+        # can't mix and match numbering and auto-numbering
+        self.assertRaises(ValueError, '{}{1}'.format, 1, 2)
+        self.assertRaises(ValueError, '{1}{}'.format, 1, 2)
+        self.assertRaises(ValueError, '{:{1}}'.format, 1, 2)
+        self.assertRaises(ValueError, '{0:{}}'.format, 1, 2)
+
+        # can mix and match auto-numbering and named
+        self.assertEqual('{f}{}'.format(4, f='test'), 'test4')
+        self.assertEqual('{}{f}'.format(4, f='test'), '4test')
+        self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3')
+        self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g')
+
     def test_buffer_is_readonly(self):
         self.assertRaises(TypeError, sys.stdin.readinto, b"")
 
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 70e4787..356f570 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1087,9 +1087,9 @@ class UnicodeTest(
         self.assertRaises(ValueError, "{0!}".format, 0)
         self.assertRaises(ValueError, "{0!rs}".format, 0)
         self.assertRaises(ValueError, "{!}".format)
-        self.assertRaises(ValueError, "{:}".format)
-        self.assertRaises(ValueError, "{:s}".format)
-        self.assertRaises(ValueError, "{}".format)
+        self.assertRaises(IndexError, "{:}".format)
+        self.assertRaises(IndexError, "{:s}".format)
+        self.assertRaises(IndexError, "{}".format)
 
         # can't have a replacement on the field name portion
         self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4)
@@ -1113,6 +1113,36 @@ class UnicodeTest(
         #  will fail
         self.assertRaises(UnicodeEncodeError, "foo{0}".format, u'\u1000bar')
 
+    def test_format_auto_numbering(self):
+        class C:
+            def __init__(self, x=100):
+                self._x = x
+            def __format__(self, spec):
+                return spec
+
+        self.assertEqual(u'{}'.format(10), u'10')
+        self.assertEqual(u'{:5}'.format('s'), u's    ')
+        self.assertEqual(u'{!r}'.format('s'), u"'s'")
+        self.assertEqual(u'{._x}'.format(C(10)), u'10')
+        self.assertEqual(u'{[1]}'.format([1, 2]), u'2')
+        self.assertEqual(u'{[a]}'.format({'a':4, 'b':2}), u'4')
+        self.assertEqual(u'a{}b{}c'.format(0, 1), u'a0b1c')
+
+        self.assertEqual(u'a{:{}}b'.format('x', '^10'), u'a    x     b')
+        self.assertEqual(u'a{:{}x}b'.format(20, '#'), u'a0x14b')
+
+        # can't mix and match numbering and auto-numbering
+        self.assertRaises(ValueError, u'{}{1}'.format, 1, 2)
+        self.assertRaises(ValueError, u'{1}{}'.format, 1, 2)
+        self.assertRaises(ValueError, u'{:{1}}'.format, 1, 2)
+        self.assertRaises(ValueError, u'{0:{}}'.format, 1, 2)
+
+        # can mix and match auto-numbering and named
+        self.assertEqual(u'{f}{}'.format(4, f='test'), u'test4')
+        self.assertEqual(u'{}{f}'.format(4, f='test'), u'4test')
+        self.assertEqual(u'{:{f}}{g}{}'.format(1, 3, g='g', f=2), u' 1g3')
+        self.assertEqual(u'{f:{}}{}{g}'.format(2, 4, f=1, g='g'), u' 14g')
+
     def test_raiseMemError(self):
         # Ensure that the freelist contains a consistent object, even
         # when a string allocation fails with a MemoryError.
diff --git a/Misc/NEWS b/Misc/NEWS
index d5ddd10..9432295 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1
 Core and Builtins
 -----------------
 
+- Issue #5237: Allow auto-numbered fields in str.format(). For
+  example: '{} {}'.format(1, 2) == '1 2'.
+
 - Issue #3652: Make the 'line' argument for warnings.showwarning() a
   requirement.  Means the DeprecationWarning from Python 2.6 can go away.
 
diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h
index 600e6b0..cc7c66f 100644
--- a/Objects/stringlib/string_format.h
+++ b/Objects/stringlib/string_format.h
@@ -31,10 +31,23 @@ typedef struct {
 } SubString;
 
 
+typedef enum {
+    ANS_INIT,
+    ANS_AUTO,
+    ANS_MANUAL,
+} AutoNumberState;   /* Keep track if we're auto-numbering fields */
+
+/* Keeps track of our auto-numbering state, and which number field we're on */
+typedef struct {
+    AutoNumberState an_state;
+    int an_field_number;
+} AutoNumber;
+
+
 /* forward declaration for recursion */
 static PyObject *
 build_string(SubString *input, PyObject *args, PyObject *kwargs,
-             int recursion_depth);
+             int recursion_depth, AutoNumber *auto_number);
 
 
 
@@ -42,6 +55,13 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
 /**************************  Utility  functions  ************************/
 /************************************************************************/
 
+static void
+AutoNumber_Init(AutoNumber *auto_number)
+{
+    auto_number->an_state = ANS_INIT;
+    auto_number->an_field_number = 0;
+}
+
 /* fill in a SubString from a pointer and length */
 Py_LOCAL_INLINE(void)
 SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
@@ -74,6 +94,32 @@ SubString_new_object_or_empty(SubString *str)
     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
 }
 
+/* Return 1 if an error has been detected switching between automatic
+   field numbering and manual field specification, else return 0. Set
+   ValueError on error. */
+static int
+autonumber_state_error(AutoNumberState state, int field_name_is_empty)
+{
+    if (state == ANS_MANUAL) {
+        if (field_name_is_empty) {
+            PyErr_SetString(PyExc_ValueError, "cannot switch from "
+                            "manual field specification to "
+                            "automatic field numbering");
+            return 1;
+        }
+    }
+    else {
+        if (!field_name_is_empty) {
+            PyErr_SetString(PyExc_ValueError, "cannot switch from "
+                            "automatic field numbering to "
+                            "manual field specification");
+            return 1;
+        }
+    }
+    return 0;
+}
+
+
 /************************************************************************/
 /***********    Output string management functions       ****************/
 /************************************************************************/
@@ -352,11 +398,14 @@ FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
 */
 static int
 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
-                 Py_ssize_t *first_idx, FieldNameIterator *rest)
+                 Py_ssize_t *first_idx, FieldNameIterator *rest,
+                 AutoNumber *auto_number)
 {
     STRINGLIB_CHAR c;
     STRINGLIB_CHAR *p = ptr;
     STRINGLIB_CHAR *end = ptr + len;
+    int field_name_is_empty;
+    int using_numeric_index;
 
     /* find the part up until the first '.' or '[' */
     while (p < end) {
@@ -380,15 +429,41 @@ field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
     /* see if "first" is an integer, in which case it's used as an index */
     *first_idx = get_integer(first);
 
-    /* zero length string is an error */
-    if (first->ptr >= first->end) {
-        PyErr_SetString(PyExc_ValueError, "empty field name");
-        goto error;
+    field_name_is_empty = first->ptr >= first->end;
+
+    /* If the field name is omitted or if we have a numeric index
+       specified, then we're doing numeric indexing into args. */
+    using_numeric_index = field_name_is_empty || *first_idx != -1;
+
+    /* We always get here exactly one time for each field we're
+       processing. And we get here in field order (counting by left
+       braces). So this is the perfect place to handle automatic field
+       numbering if the field name is omitted. */
+
+    /* Check if we need to do the auto-numbering. It's not needed if
+       we're called from string.Format routines, because it's handled
+       in that class by itself. */
+    if (auto_number) {
+        /* Initialize our auto numbering state if this is the first
+           time we're either auto-numbering or manually numbering. */
+        if (auto_number->an_state == ANS_INIT && using_numeric_index)
+            auto_number->an_state = field_name_is_empty ?
+                ANS_AUTO : ANS_MANUAL;
+
+        /* Make sure our state is consistent with what we're doing
+           this time through. Only check if we're using a numeric
+           index. */
+        if (using_numeric_index)
+            if (autonumber_state_error(auto_number->an_state,
+                                       field_name_is_empty))
+                return 0;
+        /* Zero length field means we want to do auto-numbering of the
+           fields. */
+        if (field_name_is_empty)
+            *first_idx = (auto_number->an_field_number)++;
     }
 
     return 1;
-error:
-    return 0;
 }
 
 
@@ -398,7 +473,8 @@ error:
     the entire input string.
 */
 static PyObject *
-get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
+get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
+                 AutoNumber *auto_number)
 {
     PyObject *obj = NULL;
     int ok;
@@ -409,7 +485,7 @@ get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
     FieldNameIterator rest;
 
     if (!field_name_split(input->ptr, input->end - input->ptr, &first,
-                          &index, &rest)) {
+                          &index, &rest, auto_number)) {
         goto error;
     }
 
@@ -557,14 +633,18 @@ static int
 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
             STRINGLIB_CHAR *conversion)
 {
+    /* Note this function works if the field name is zero length,
+       which is good.  Zero length field names are handled later, in
+       field_name_split. */
+
     STRINGLIB_CHAR c = 0;
 
     /* initialize these, as they may be empty */
     *conversion = '\0';
     SubString_init(format_spec, NULL, 0);
 
-    /* search for the field name.  it's terminated by the end of the
-       string, or a ':' or '!' */
+    /* Search for the field name.  it's terminated by the end of
+       the string, or a ':' or '!' */
     field_name->ptr = str->ptr;
     while (str->ptr < str->end) {
         switch (c = *(str->ptr++)) {
@@ -607,15 +687,12 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
                 }
             }
         }
-
-        return 1;
-
     }
-    else {
+    else
         /* end of string, there's no format_spec or conversion */
         field_name->end = str->ptr;
-        return 1;
-    }
+
+    return 1;
 }
 
 /************************************************************************/
@@ -642,8 +719,8 @@ MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
    string (or something to be expanded) */
 static int
 MarkupIterator_next(MarkupIterator *self, SubString *literal,
-                    SubString *field_name, SubString *format_spec,
-                    STRINGLIB_CHAR *conversion,
+                    int *field_present, SubString *field_name,
+                    SubString *format_spec, STRINGLIB_CHAR *conversion,
                     int *format_spec_needs_expanding)
 {
     int at_end;
@@ -659,6 +736,7 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
     SubString_init(format_spec, NULL, 0);
     *conversion = '\0';
     *format_spec_needs_expanding = 0;
+    *field_present = 0;
 
     /* No more input, end of iterator.  This is the normal exit
        path. */
@@ -720,6 +798,7 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
     /* this is markup, find the end of the string by counting nested
        braces.  note that this prohibits escaped braces, so that
        format_specs cannot have braces in them. */
+    *field_present = 1;
     count = 1;
 
     start = self->str.ptr;
@@ -744,13 +823,6 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
                 if (parse_field(&s, field_name, format_spec, conversion) == 0)
                     return 0;
 
-                /* a zero length field_name is an error */
-                if (field_name->ptr == field_name->end) {
-                    PyErr_SetString(PyExc_ValueError, "zero length field name "
-                                    "in format");
-                    return 0;
-                }
-
                 /* success */
                 return 2;
             }
@@ -798,13 +870,17 @@ do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
    compute the result and write it to output.
    format_spec_needs_expanding is an optimization.  if it's false,
    just output the string directly, otherwise recursively expand the
-   format_spec string. */
+   format_spec string.
+
+   field_name is allowed to be zero length, in which case we
+   are doing auto field numbering.
+*/
 
 static int
 output_markup(SubString *field_name, SubString *format_spec,
               int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
               OutputString *output, PyObject *args, PyObject *kwargs,
-              int recursion_depth)
+              int recursion_depth, AutoNumber *auto_number)
 {
     PyObject *tmp = NULL;
     PyObject *fieldobj = NULL;
@@ -813,7 +889,7 @@ output_markup(SubString *field_name, SubString *format_spec,
     int result = 0;
 
     /* convert field_name to an object */
-    fieldobj = get_field_object(field_name, args, kwargs);
+    fieldobj = get_field_object(field_name, args, kwargs, auto_number);
     if (fieldobj == NULL)
         goto done;
 
@@ -830,7 +906,8 @@ output_markup(SubString *field_name, SubString *format_spec,
 
     /* if needed, recurively compute the format_spec */
     if (format_spec_needs_expanding) {
-        tmp = build_string(format_spec, args, kwargs, recursion_depth-1);
+        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
+                           auto_number);
         if (tmp == NULL)
             goto done;
 
@@ -864,26 +941,28 @@ done:
 */
 static int
 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
-          OutputString *output, int recursion_depth)
+          OutputString *output, int recursion_depth, AutoNumber *auto_number)
 {
     MarkupIterator iter;
     int format_spec_needs_expanding;
     int result;
+    int field_present;
     SubString literal;
     SubString field_name;
     SubString format_spec;
     STRINGLIB_CHAR conversion;
 
     MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
-    while ((result = MarkupIterator_next(&iter, &literal, &field_name,
-                                         &format_spec, &conversion,
+    while ((result = MarkupIterator_next(&iter, &literal, &field_present,
+                                         &field_name, &format_spec,
+                                         &conversion,
                                          &format_spec_needs_expanding)) == 2) {
         if (!output_data(output, literal.ptr, literal.end - literal.ptr))
             return 0;
-        if (field_name.ptr != field_name.end)
+        if (field_present)
             if (!output_markup(&field_name, &format_spec,
                                format_spec_needs_expanding, conversion, output,
-                               args, kwargs, recursion_depth))
+                               args, kwargs, recursion_depth, auto_number))
                 return 0;
     }
     return result;
@@ -896,7 +975,7 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
 */
 static PyObject *
 build_string(SubString *input, PyObject *args, PyObject *kwargs,
-             int recursion_depth)
+             int recursion_depth, AutoNumber *auto_number)
 {
     OutputString output;
     PyObject *result = NULL;
@@ -918,7 +997,8 @@ build_string(SubString *input, PyObject *args, PyObject *kwargs,
                            INITIAL_SIZE_INCREMENT))
         goto done;
 
-    if (!do_markup(input, args, kwargs, &output, recursion_depth)) {
+    if (!do_markup(input, args, kwargs, &output, recursion_depth,
+                   auto_number)) {
         goto done;
     }
 
@@ -952,8 +1032,11 @@ do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
     */
     int recursion_depth = 2;
 
+    AutoNumber auto_number;
+
+    AutoNumber_Init(&auto_number);
     SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
-    return build_string(&input, args, kwargs, recursion_depth);
+    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
 }
 
 
@@ -998,8 +1081,9 @@ formatteriter_next(formatteriterobject *it)
     SubString format_spec;
     STRINGLIB_CHAR conversion;
     int format_spec_needs_expanding;
-    int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
-                                     &format_spec, &conversion,
+    int field_present;
+    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
+                                     &field_name, &format_spec, &conversion,
                                      &format_spec_needs_expanding);
 
     /* all of the SubString objects point into it->str, so no
@@ -1014,7 +1098,6 @@ formatteriter_next(formatteriterobject *it)
         PyObject *format_spec_str = NULL;
         PyObject *conversion_str = NULL;
         PyObject *tuple = NULL;
-        int has_field = field_name.ptr != field_name.end;
 
         literal_str = SubString_new_object(&literal);
         if (literal_str == NULL)
@@ -1026,7 +1109,7 @@ formatteriter_next(formatteriterobject *it)
 
         /* if field_name is non-zero length, return a string for
            format_spec (even if zero length), else return None */
-        format_spec_str = (has_field ?
+        format_spec_str = (field_present ?
                            SubString_new_object_or_empty :
                            SubString_new_object)(&format_spec);
         if (format_spec_str == NULL)
@@ -1250,9 +1333,11 @@ formatter_field_name_split(STRINGLIB_OBJECT *self)
     Py_INCREF(self);
     it->str = self;
 
+    /* Pass in auto_number = NULL. We'll return an empty string for
+       first_obj in that case. */
     if (!field_name_split(STRINGLIB_STR(self),
                           STRINGLIB_LEN(self),
-                          &first, &first_idx, &it->it_field))
+                          &first, &first_idx, &it->it_field, NULL))
         goto done;
 
     /* first becomes an integer, if possible; else a string */
author	Eric Smith <eric@trueblade.com>	2009-03-14 11:57:26 (GMT)
committer	Eric Smith <eric@trueblade.com>	2009-03-14 11:57:26 (GMT)
commit	6f42edb6821462c9ce02c6c4e2f57731b43956ad (patch)
tree	22a648f118e175d637901f3575d3549e975eb445
parent	feeafff0529c8fa0b5ab6a3086464cce8fd1b3fc (diff)
download	cpython-6f42edb6821462c9ce02c6c4e2f57731b43956ad.zip cpython-6f42edb6821462c9ce02c6c4e2f57731b43956ad.tar.gz cpython-6f42edb6821462c9ce02c6c4e2f57731b43956ad.tar.bz2