diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2022-01-25 22:33:57 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-25 22:33:57 (GMT) |
commit | 3fc8b74ace033a17346a992f661928ba619e61e8 (patch) | |
tree | 9e12b09912f60fbfb57952ab44d11c2ae7320afd | |
parent | 4a57fa296b92125e41220ecd201eb2e432b79fb0 (diff) | |
download | cpython-3fc8b74ace033a17346a992f661928ba619e61e8.zip cpython-3fc8b74ace033a17346a992f661928ba619e61e8.tar.gz cpython-3fc8b74ace033a17346a992f661928ba619e61e8.tar.bz2 |
[3.10] bpo-46091: Correctly calculate indentation levels for whitespace lines with continuation characters (GH-30130). (GH-30898)
(cherry picked from commit a0efc0c1960e2c49e0092694d98395555270914c)
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
-rw-r--r-- | Lib/test/test_ast.py | 3 | ||||
-rw-r--r-- | Lib/test/test_syntax.py | 30 | ||||
-rw-r--r-- | Lib/test/test_tokenize.py | 2 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2021-12-16-00-24-00.bpo-46091.rJ_e_e.rst | 2 | ||||
-rw-r--r-- | Parser/tokenizer.c | 46 |
5 files changed, 67 insertions, 16 deletions
diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index 39fc7e9..95af9e2 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -1045,8 +1045,7 @@ Module( ast.literal_eval(node) def test_literal_eval_syntax_errors(self): - msg = "unexpected character after line continuation character" - with self.assertRaisesRegex(SyntaxError, msg): + with self.assertRaisesRegex(SyntaxError, "unexpected indent"): ast.literal_eval(r''' \ (\ diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index 7aa93a0..ac5a41c 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -1463,6 +1463,36 @@ pass except SyntaxError: self.fail("Empty line after a line continuation character is valid.") + # See issue-46091 + s1 = r"""\ +def fib(n): + \ +'''Print a Fibonacci series up to n.''' + \ +a, b = 0, 1 +""" + s2 = r"""\ +def fib(n): + '''Print a Fibonacci series up to n.''' + a, b = 0, 1 +""" + try: + self.assertEqual(compile(s1, '<string>', 'exec'), compile(s2, '<string>', 'exec')) + except SyntaxError: + self.fail("Indented statement over multiple lines is valid") + + def test_continuation_bad_indentation(self): + # Check that code that breaks indentation across multiple lines raises a syntax error + + code = r"""\ +if x: + y = 1 + \ + foo = 1 + """ + + self.assertRaises(IndentationError, exec, code) + @support.cpython_only def test_nested_named_except_blocks(self): code = "" diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 4bce1ca..127f0a1 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -6,6 +6,7 @@ from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, NEWLINE) from io import BytesIO, StringIO import unittest +from textwrap import dedent from unittest import TestCase, mock from test.test_grammar import (VALID_UNDERSCORE_LITERALS, INVALID_UNDERSCORE_LITERALS) @@ -45,7 +46,6 @@ class TokenizeTest(TestCase): # The ENDMARKER and final NEWLINE are omitted. f = BytesIO(s.encode('utf-8')) result = stringify_tokens_from_source(tokenize(f.readline), s) - self.assertEqual(result, [" ENCODING 'utf-8' (0, 0) (0, 0)"] + expected.rstrip().splitlines()) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-12-16-00-24-00.bpo-46091.rJ_e_e.rst b/Misc/NEWS.d/next/Core and Builtins/2021-12-16-00-24-00.bpo-46091.rJ_e_e.rst new file mode 100644 index 0000000..a2eee0f --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-12-16-00-24-00.bpo-46091.rJ_e_e.rst @@ -0,0 +1,2 @@ +Correctly calculate indentation levels for lines with whitespace character +that are ended by line continuation characters. Patch by Pablo Galindo diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 8e9c69d..de5f576 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1346,6 +1346,24 @@ tok_decimal_tail(struct tok_state *tok) /* Get next token, after space stripping etc. */ +static inline int +tok_continuation_line(struct tok_state *tok) { + int c = tok_nextc(tok); + if (c != '\n') { + tok->done = E_LINECONT; + return -1; + } + c = tok_nextc(tok); + if (c == EOF) { + tok->done = E_EOF; + tok->cur = tok->inp; + return -1; + } else { + tok_backup(tok, c); + } + return c; +} + static int tok_get(struct tok_state *tok, const char **p_start, const char **p_end) { @@ -1362,6 +1380,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) int col = 0; int altcol = 0; tok->atbol = 0; + int cont_line_col = 0; for (;;) { c = tok_nextc(tok); if (c == ' ') { @@ -1374,14 +1393,23 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) else if (c == '\014') {/* Control-L (formfeed) */ col = altcol = 0; /* For Emacs users */ } + else if (c == '\\') { + // Indentation cannot be split over multiple physical lines + // using backslashes. This means that if we found a backslash + // preceded by whitespace, **the first one we find** determines + // the level of indentation of whatever comes next. + cont_line_col = cont_line_col ? cont_line_col : col; + if ((c = tok_continuation_line(tok)) == -1) { + return ERRORTOKEN; + } + } else { break; } } tok_backup(tok, c); - if (c == '#' || c == '\n' || c == '\\') { + if (c == '#' || c == '\n') { /* Lines with only whitespace and/or comments - and/or a line continuation character shouldn't affect the indentation and are not passed to the parser as NEWLINE tokens, except *totally* empty lines in interactive @@ -1402,6 +1430,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) may need to skip to the end of a comment */ } if (!blankline && tok->level == 0) { + col = cont_line_col ? cont_line_col : col; + altcol = cont_line_col ? cont_line_col : altcol; if (col == tok->indstack[tok->indent]) { /* No change */ if (altcol != tok->altindstack[tok->indent]) { @@ -1963,19 +1993,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) /* Line continuation */ if (c == '\\') { - c = tok_nextc(tok); - if (c != '\n') { - tok->done = E_LINECONT; + if ((c = tok_continuation_line(tok)) == -1) { return ERRORTOKEN; } - c = tok_nextc(tok); - if (c == EOF) { - tok->done = E_EOF; - tok->cur = tok->inp; - return ERRORTOKEN; - } else { - tok_backup(tok, c); - } tok->cont_line = 1; goto again; /* Read next line */ } |