diff options
Diffstat (limited to 'Parser')
-rw-r--r-- | Parser/parsetok.c | 15 | ||||
-rw-r--r-- | Parser/tokenizer.c | 7 | ||||
-rw-r--r-- | Parser/tokenizer.h | 5 |
3 files changed, 24 insertions, 3 deletions
diff --git a/Parser/parsetok.c b/Parser/parsetok.c index fc878d8..d37e28a 100644 --- a/Parser/parsetok.c +++ b/Parser/parsetok.c @@ -205,6 +205,8 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, size_t len; char *str; col_offset = -1; + int lineno; + const char *line_start; type = PyTokenizer_Get(tok, &a, &b); if (type == ERRORTOKEN) { @@ -253,8 +255,15 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, } } #endif - if (a != NULL && a >= tok->line_start) { - col_offset = Py_SAFE_DOWNCAST(a - tok->line_start, + + /* Nodes of type STRING, especially multi line strings + must be handled differently in order to get both + the starting line number and the column offset right. + (cf. issue 16806) */ + lineno = type == STRING ? tok->first_lineno : tok->lineno; + line_start = type == STRING ? tok->multi_line_start : tok->line_start; + if (a != NULL && a >= line_start) { + col_offset = Py_SAFE_DOWNCAST(a - line_start, intptr_t, int); } else { @@ -263,7 +272,7 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, if ((err_ret->error = PyParser_AddToken(ps, (int)type, str, - tok->lineno, col_offset, + lineno, col_offset, &(err_ret->expected))) != E_OK) { if (err_ret->error != E_DONE) { PyObject_FREE(str); diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 0e6c1a8..3e3cf2c 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -1519,6 +1519,13 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) int quote_size = 1; /* 1 or 3 */ int end_quote_size = 0; + /* Nodes of type STRING, especially multi line strings + must be handled differently in order to get both + the starting line number and the column offset right. + (cf. issue 16806) */ + tok->first_lineno = tok->lineno; + tok->multi_line_start = tok->line_start; + /* Find the quote size and start of string */ c = tok_nextc(tok); if (c == quote) { diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index cd18d25..096ce68 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -38,6 +38,8 @@ struct tok_state { int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ const char *prompt, *nextprompt; /* For interactive prompting */ int lineno; /* Current line number */ + int first_lineno; /* First line of a single line or multi line string + expression (cf. issue 16806) */ int level; /* () [] {} Parentheses nesting level */ /* Used to allow free continuations inside them */ #ifndef PGEN @@ -58,6 +60,9 @@ struct tok_state { char *encoding; /* Source encoding. */ int cont_line; /* whether we are in a continuation line. */ const char* line_start; /* pointer to start of current line */ + const char* multi_line_start; /* pointer to start of first line of + a single line or multi line string + expression (cf. issue 16806) */ #ifndef PGEN PyObject *decoding_readline; /* open(...).readline */ PyObject *decoding_buffer; |