bpo-30529: Fix errors for invalid whitespaces in f-string subexpressions. (#1888)

'invalid character in identifier' now is raised instead of 'f-string: empty expression not allowed' if a subexpression contains only whitespaces and they are not accepted by Python parser.
author: Serhiy Storchaka <storchaka@gmail.com> 2017-06-08 20:43:54 (GMT)
committer: GitHub <noreply@github.com> 2017-06-08 20:43:54 (GMT)
commit: 2e9cd5825c5ccdbb6f65a57c0c7941078e003c14 (patch)
tree: 1f39984ce8de33ad6f1da69e34e5e09b9161823c
parent: 29adc13bd797d9c9e7fcb893a7c49ce7f7ad388c (diff)
download: cpython-2e9cd5825c5ccdbb6f65a57c0c7941078e003c14.zip
cpython-2e9cd5825c5ccdbb6f65a57c0c7941078e003c14.tar.gz
cpython-2e9cd5825c5ccdbb6f65a57c0c7941078e003c14.tar.bz2
2 files changed, 17 insertions, 24 deletions
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index 2573002..b398704 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -280,6 +280,10 @@ f'{a * x()}'"""
                              "f'{10:{ }}'",
                              "f' { } '",
 
+                             # The Python parser ignores also the following
+                             # whitespace characters in additional to a space.
+                             "f'''{\t\f\r\n}'''",
+
                              # Catch the empty expression before the
                              #  invalid conversion.
                              "f'{!x}'",
@@ -300,6 +304,12 @@ f'{a * x()}'"""
                              "f'{:x'",
                              ])
 
+        # Different error message is raised for other whitespace characters.
+        self.assertAllRaise(SyntaxError, 'invalid character in identifier',
+                            ["f'''{\xa0}'''",
+                             "\xa0",
+                             ])
+
     def test_parens_in_expressions(self):
         self.assertEqual(f'{3,}', '(3,)')
 
diff --git a/Python/ast.c b/Python/ast.c
index 205c711..7551b6f 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -4274,49 +4274,32 @@ fstring_compile_expr(const char *expr_start, const char *expr_end,
                      struct compiling *c, const node *n)
 
 {
-    int all_whitespace = 1;
-    int kind;
-    void *data;
     PyCompilerFlags cf;
     mod_ty mod;
     char *str;
-    PyObject *o;
     Py_ssize_t len;
-    Py_ssize_t i;
+    const char *s;
 
     assert(expr_end >= expr_start);
     assert(*(expr_start-1) == '{');
     assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
 
-    /* We know there are no escapes here, because backslashes are not allowed,
-       and we know it's utf-8 encoded (per PEP 263).  But, in order to check
-       that each char is not whitespace, we need to decode it to unicode.
-       Which is unfortunate, but such is life. */
-
     /* If the substring is all whitespace, it's an error.  We need to catch
        this here, and not when we call PyParser_ASTFromString, because turning
        the expression '' in to '()' would go from being invalid to valid. */
-    /* Note that this code says an empty string is all whitespace.  That's
-       important.  There's a test for it: f'{}'. */
-    o = PyUnicode_DecodeUTF8(expr_start, expr_end-expr_start, NULL);
-    if (o == NULL)
-        return NULL;
-    len = PyUnicode_GET_LENGTH(o);
-    kind = PyUnicode_KIND(o);
-    data = PyUnicode_DATA(o);
-    for (i = 0; i < len; i++) {
-        if (!Py_UNICODE_ISSPACE(PyUnicode_READ(kind, data, i))) {
-            all_whitespace = 0;
+    for (s = expr_start; s != expr_end; s++) {
+        char c = *s;
+        /* The Python parser ignores only the following whitespace
+           characters (\r already is converted to \n). */
+        if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
             break;
         }
     }
-    Py_DECREF(o);
-    if (all_whitespace) {
+    if (s == expr_end) {
         ast_error(c, n, "f-string: empty expression not allowed");
         return NULL;
     }
 
-    /* Reuse len to be the length of the utf-8 input string. */
     len = expr_end - expr_start;
     /* Allocate 3 extra bytes: open paren, close paren, null byte. */
     str = PyMem_RawMalloc(len + 3);
author	Serhiy Storchaka <storchaka@gmail.com>	2017-06-08 20:43:54 (GMT)
committer	GitHub <noreply@github.com>	2017-06-08 20:43:54 (GMT)
commit	2e9cd5825c5ccdbb6f65a57c0c7941078e003c14 (patch)
tree	1f39984ce8de33ad6f1da69e34e5e09b9161823c
parent	29adc13bd797d9c9e7fcb893a7c49ce7f7ad388c (diff)
download	cpython-2e9cd5825c5ccdbb6f65a57c0c7941078e003c14.zip cpython-2e9cd5825c5ccdbb6f65a57c0c7941078e003c14.tar.gz cpython-2e9cd5825c5ccdbb6f65a57c0c7941078e003c14.tar.bz2