diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2022-01-25 22:12:14 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-25 22:12:14 (GMT) |
commit | a0efc0c1960e2c49e0092694d98395555270914c (patch) | |
tree | beee479f9d645494a6a1d216d7f48932010d67a9 /Parser/tokenizer.c | |
parent | b1cb8430504931f7854eac5d32cba74770078a4e (diff) | |
download | cpython-a0efc0c1960e2c49e0092694d98395555270914c.zip cpython-a0efc0c1960e2c49e0092694d98395555270914c.tar.gz cpython-a0efc0c1960e2c49e0092694d98395555270914c.tar.bz2 |
bpo-46091: Correctly calculate indentation levels for whitespace lines with continuation characters (GH-30130)
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r-- | Parser/tokenizer.c | 46 |
1 files changed, 33 insertions, 13 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 5e35d6f..cd4254f 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1347,6 +1347,24 @@ tok_decimal_tail(struct tok_state *tok) /* Get next token, after space stripping etc. */ +static inline int +tok_continuation_line(struct tok_state *tok) { + int c = tok_nextc(tok); + if (c != '\n') { + tok->done = E_LINECONT; + return -1; + } + c = tok_nextc(tok); + if (c == EOF) { + tok->done = E_EOF; + tok->cur = tok->inp; + return -1; + } else { + tok_backup(tok, c); + } + return c; +} + static int tok_get(struct tok_state *tok, const char **p_start, const char **p_end) { @@ -1363,6 +1381,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) int col = 0; int altcol = 0; tok->atbol = 0; + int cont_line_col = 0; for (;;) { c = tok_nextc(tok); if (c == ' ') { @@ -1375,14 +1394,23 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) else if (c == '\014') {/* Control-L (formfeed) */ col = altcol = 0; /* For Emacs users */ } + else if (c == '\\') { + // Indentation cannot be split over multiple physical lines + // using backslashes. This means that if we found a backslash + // preceded by whitespace, **the first one we find** determines + // the level of indentation of whatever comes next. + cont_line_col = cont_line_col ? cont_line_col : col; + if ((c = tok_continuation_line(tok)) == -1) { + return ERRORTOKEN; + } + } else { break; } } tok_backup(tok, c); - if (c == '#' || c == '\n' || c == '\\') { + if (c == '#' || c == '\n') { /* Lines with only whitespace and/or comments - and/or a line continuation character shouldn't affect the indentation and are not passed to the parser as NEWLINE tokens, except *totally* empty lines in interactive @@ -1403,6 +1431,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) may need to skip to the end of a comment */ } if (!blankline && tok->level == 0) { + col = cont_line_col ? cont_line_col : col; + altcol = cont_line_col ? cont_line_col : altcol; if (col == tok->indstack[tok->indent]) { /* No change */ if (altcol != tok->altindstack[tok->indent]) { @@ -1964,19 +1994,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) /* Line continuation */ if (c == '\\') { - c = tok_nextc(tok); - if (c != '\n') { - tok->done = E_LINECONT; + if ((c = tok_continuation_line(tok)) == -1) { return ERRORTOKEN; } - c = tok_nextc(tok); - if (c == EOF) { - tok->done = E_EOF; - tok->cur = tok->inp; - return ERRORTOKEN; - } else { - tok_backup(tok, c); - } tok->cont_line = 1; goto again; /* Read next line */ } |