summaryrefslogtreecommitdiffstats
path: root/Parser/tokenizer.c
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2022-01-25 22:12:14 (GMT)
committerGitHub <noreply@github.com>2022-01-25 22:12:14 (GMT)
commita0efc0c1960e2c49e0092694d98395555270914c (patch)
treebeee479f9d645494a6a1d216d7f48932010d67a9 /Parser/tokenizer.c
parentb1cb8430504931f7854eac5d32cba74770078a4e (diff)
downloadcpython-a0efc0c1960e2c49e0092694d98395555270914c.zip
cpython-a0efc0c1960e2c49e0092694d98395555270914c.tar.gz
cpython-a0efc0c1960e2c49e0092694d98395555270914c.tar.bz2
bpo-46091: Correctly calculate indentation levels for whitespace lines with continuation characters (GH-30130)
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r--Parser/tokenizer.c46
1 files changed, 33 insertions, 13 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 5e35d6f..cd4254f 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1347,6 +1347,24 @@ tok_decimal_tail(struct tok_state *tok)
/* Get next token, after space stripping etc. */
+static inline int
+tok_continuation_line(struct tok_state *tok) {
+ int c = tok_nextc(tok);
+ if (c != '\n') {
+ tok->done = E_LINECONT;
+ return -1;
+ }
+ c = tok_nextc(tok);
+ if (c == EOF) {
+ tok->done = E_EOF;
+ tok->cur = tok->inp;
+ return -1;
+ } else {
+ tok_backup(tok, c);
+ }
+ return c;
+}
+
static int
tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
{
@@ -1363,6 +1381,7 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
int col = 0;
int altcol = 0;
tok->atbol = 0;
+ int cont_line_col = 0;
for (;;) {
c = tok_nextc(tok);
if (c == ' ') {
@@ -1375,14 +1394,23 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
else if (c == '\014') {/* Control-L (formfeed) */
col = altcol = 0; /* For Emacs users */
}
+ else if (c == '\\') {
+ // Indentation cannot be split over multiple physical lines
+ // using backslashes. This means that if we found a backslash
+ // preceded by whitespace, **the first one we find** determines
+ // the level of indentation of whatever comes next.
+ cont_line_col = cont_line_col ? cont_line_col : col;
+ if ((c = tok_continuation_line(tok)) == -1) {
+ return ERRORTOKEN;
+ }
+ }
else {
break;
}
}
tok_backup(tok, c);
- if (c == '#' || c == '\n' || c == '\\') {
+ if (c == '#' || c == '\n') {
/* Lines with only whitespace and/or comments
- and/or a line continuation character
shouldn't affect the indentation and are
not passed to the parser as NEWLINE tokens,
except *totally* empty lines in interactive
@@ -1403,6 +1431,8 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
may need to skip to the end of a comment */
}
if (!blankline && tok->level == 0) {
+ col = cont_line_col ? cont_line_col : col;
+ altcol = cont_line_col ? cont_line_col : altcol;
if (col == tok->indstack[tok->indent]) {
/* No change */
if (altcol != tok->altindstack[tok->indent]) {
@@ -1964,19 +1994,9 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
/* Line continuation */
if (c == '\\') {
- c = tok_nextc(tok);
- if (c != '\n') {
- tok->done = E_LINECONT;
+ if ((c = tok_continuation_line(tok)) == -1) {
return ERRORTOKEN;
}
- c = tok_nextc(tok);
- if (c == EOF) {
- tok->done = E_EOF;
- tok->cur = tok->inp;
- return ERRORTOKEN;
- } else {
- tok_backup(tok, c);
- }
tok->cont_line = 1;
goto again; /* Read next line */
}