[3.10] bpo-46091: Correctly calculate indentation levels for whitespace lines with continuation characters (GH-30130). (GH-30898)

(cherry picked from commit a0efc0c1960e2c49e0092694d98395555270914c) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
author: Pablo Galindo Salgado <Pablogsal@gmail.com> 2022-01-25 22:33:57 (GMT)
committer: GitHub <noreply@github.com> 2022-01-25 22:33:57 (GMT)
commit: 3fc8b74ace033a17346a992f661928ba619e61e8 (patch)
tree: 9e12b09912f60fbfb57952ab44d11c2ae7320afd
parent: 4a57fa296b92125e41220ecd201eb2e432b79fb0 (diff)
download: cpython-3fc8b74ace033a17346a992f661928ba619e61e8.zip
cpython-3fc8b74ace033a17346a992f661928ba619e61e8.tar.gz
cpython-3fc8b74ace033a17346a992f661928ba619e61e8.tar.bz2
5 files changed, 67 insertions, 16 deletions
diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py
index 39fc7e9..95af9e2 100644
--- a/Lib/test/test_ast.py
+++ b/Lib/test/test_ast.py
@@ -1045,8 +1045,7 @@ Module(
             ast.literal_eval(node)
 
     def test_literal_eval_syntax_errors(self):
-        msg = "unexpected character after line continuation character"
-        with self.assertRaisesRegex(SyntaxError, msg):
+        with self.assertRaisesRegex(SyntaxError, "unexpected indent"):
             ast.literal_eval(r'''
                 \
                 (\
diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py
index 7aa93a0..ac5a41c 100644
--- a/Lib/test/test_syntax.py
+++ b/Lib/test/test_syntax.py
@@ -1463,6 +1463,36 @@ pass
         except SyntaxError:
             self.fail("Empty line after a line continuation character is valid.")
 
+        # See issue-46091
+        s1 = r"""\
+def fib(n):
+    \
+'''Print a Fibonacci series up to n.'''
+    \
+a, b = 0, 1
+"""
+        s2 = r"""\
+def fib(n):
+    '''Print a Fibonacci series up to n.'''
+    a, b = 0, 1
+"""
+        try:
+            self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec'))
+        except SyntaxError:
+            self.fail("Indented statement over multiple lines is valid")
+    
+    def test_continuation_bad_indentation(self): 
+        # Check that code that breaks indentation across multiple lines raises a syntax error
+
+        code = r"""\
+if x:
+    y = 1
+  \
+  foo = 1
+        """
+
+        self.assertRaises(IndentationError, exec, code)
+
     @support.cpython_only
     def test_nested_named_except_blocks(self):
         code = ""
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 4bce1ca..127f0a1 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -6,6 +6,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
                      NEWLINE)
 from io import BytesIO, StringIO
 import unittest
+from textwrap import dedent
 from unittest import TestCase, mock
 from test.test_grammar import (VALID_UNDERSCORE_LITERALS,
                                INVALID_UNDERSCORE_LITERALS)
@@ -45,7 +46,6 @@ class TokenizeTest(TestCase):
         # The ENDMARKER and final NEWLINE are omitted.
         f = BytesIO(s.encode('utf-8'))
         result = stringify_tokens_from_source(tokenize(f.readline), s)
-
         self.assertEqual(result,
                          ["    ENCODING   'utf-8'       (0, 0) (0, 0)"] +
                          expected.rstrip().splitlines())
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-12-16-00-24-00.bpo-46091.rJ_e_e.rst b/Misc/NEWS.d/next/Core and Builtins/2021-12-16-00-24-00.bpo-46091.rJ_e_e.rst
new file mode 100644
index 0000000..a2eee0f
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-12-16-00-24-00.bpo-46091.rJ_e_e.rst
@@ -0,0 +1,2 @@
+Correctly calculate indentation levels for lines with whitespace character
+that are ended by line continuation characters. Patch by Pablo Galindo
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 8e9c69d..de5f576 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1346,6 +1346,24 @@ tok_decimal_tail(struct tok_state *tok)
 
 /* Get next token, after space stripping etc. */
 
+static inline int
+tok_continuation_line(struct tok_state *tok) {
+    int c = tok_nextc(tok);
+    if (c != '\n') {
+        tok->done = E_LINECONT;
+        return -1;
+    }
+    c = tok_nextc(tok);
+    if (c == EOF) {
+        tok->done = E_EOF;
+        tok->cur = tok->inp;
+        return -1;
+    } else {
+        tok_backup(tok, c);
+    }
+    return c;
+}
+
 static int
 tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
 {
@@ -1362,6 +1380,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
         int col = 0;
         int altcol = 0;
         tok->atbol = 0;
+        int cont_line_col = 0;
         for (;;) {
             c = tok_nextc(tok);
             if (c == ' ') {
@@ -1374,14 +1393,23 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
             else if (c == '\014')  {/* Control-L (formfeed) */
                 col = altcol = 0; /* For Emacs users */
             }
+            else if (c == '\\') {
+                // Indentation cannot be split over multiple physical lines
+                // using backslashes. This means that if we found a backslash
+                // preceded by whitespace, **the first one we find** determines
+                // the level of indentation of whatever comes next.
+                cont_line_col = cont_line_col ? cont_line_col : col;
+                if ((c = tok_continuation_line(tok)) == -1) {
+                    return ERRORTOKEN;
+                }
+            }
             else {
                 break;
             }
         }
         tok_backup(tok, c);
-        if (c == '#' || c == '\n' || c == '\\') {
+        if (c == '#' || c == '\n') {
             /* Lines with only whitespace and/or comments
-               and/or a line continuation character
                shouldn't affect the indentation and are
                not passed to the parser as NEWLINE tokens,
                except *totally* empty lines in interactive
@@ -1402,6 +1430,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
                may need to skip to the end of a comment */
         }
         if (!blankline && tok->level == 0) {
+            col = cont_line_col ? cont_line_col : col;
+            altcol = cont_line_col ? cont_line_col : altcol;
             if (col == tok->indstack[tok->indent]) {
                 /* No change */
                 if (altcol != tok->altindstack[tok->indent]) {
@@ -1963,19 +1993,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
 
     /* Line continuation */
     if (c == '\\') {
-        c = tok_nextc(tok);
-        if (c != '\n') {
-            tok->done = E_LINECONT;
+        if ((c = tok_continuation_line(tok)) == -1) {
             return ERRORTOKEN;
         }
-        c = tok_nextc(tok);
-        if (c == EOF) {
-            tok->done = E_EOF;
-            tok->cur = tok->inp;
-            return ERRORTOKEN;
-        } else {
-            tok_backup(tok, c);
-        }
         tok->cont_line = 1;
         goto again; /* Read next line */
     }
author	Pablo Galindo Salgado <Pablogsal@gmail.com>	2022-01-25 22:33:57 (GMT)
committer	GitHub <noreply@github.com>	2022-01-25 22:33:57 (GMT)
commit	3fc8b74ace033a17346a992f661928ba619e61e8 (patch)
tree	9e12b09912f60fbfb57952ab44d11c2ae7320afd
parent	4a57fa296b92125e41220ecd201eb2e432b79fb0 (diff)
download	cpython-3fc8b74ace033a17346a992f661928ba619e61e8.zip cpython-3fc8b74ace033a17346a992f661928ba619e61e8.tar.gz cpython-3fc8b74ace033a17346a992f661928ba619e61e8.tar.bz2