diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2023-05-26 21:02:26 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-26 21:02:26 (GMT) |
commit | 46b52e6e2bda51d0b89a64ee36ce2d305a7409f3 (patch) | |
tree | 29e431021c874093e6964b4d0ed8f6ba4d096ee6 /Python/Python-tokenize.c | |
parent | 402ee5a68b306b489b782478ab96e8e3b913587a (diff) | |
download | cpython-46b52e6e2bda51d0b89a64ee36ce2d305a7409f3.zip cpython-46b52e6e2bda51d0b89a64ee36ce2d305a7409f3.tar.gz cpython-46b52e6e2bda51d0b89a64ee36ce2d305a7409f3.tar.bz2 |
gh-104976: Ensure trailing dedent tokens are emitted as the previous tokenizer (#104980)
Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
Diffstat (limited to 'Python/Python-tokenize.c')
-rw-r--r-- | Python/Python-tokenize.c | 26 |
1 files changed, 23 insertions, 3 deletions
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 88087c1..01c2215 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -30,6 +30,7 @@ class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_t typedef struct { PyObject_HEAD struct tok_state *tok; + int done; } tokenizeriterobject; /*[clinic input] @@ -63,6 +64,7 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source, if (extra_tokens) { self->tok->tok_extra_tokens = 1; } + self->done = 0; return (PyObject *)self; } @@ -179,8 +181,9 @@ tokenizeriter_next(tokenizeriterobject *it) } goto exit; } - if (type == ERRORTOKEN || type == ENDMARKER) { + if (it->done || type == ERRORTOKEN) { PyErr_SetString(PyExc_StopIteration, "EOF"); + it->done = 1; goto exit; } PyObject *str = NULL; @@ -194,9 +197,19 @@ tokenizeriter_next(tokenizeriterobject *it) goto exit; } + int is_trailing_token = 0; + if (type == ENDMARKER || (type == DEDENT && it->tok->done == E_EOF)) { + is_trailing_token = 1; + } + const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start; - Py_ssize_t size = it->tok->inp - line_start; - PyObject *line = PyUnicode_DecodeUTF8(line_start, size, "replace"); + PyObject* line = NULL; + if (it->tok->tok_extra_tokens && is_trailing_token) { + line = PyUnicode_FromString(""); + } else { + Py_ssize_t size = it->tok->inp - line_start; + line = PyUnicode_DecodeUTF8(line_start, size, "replace"); + } if (line == NULL) { Py_DECREF(str); goto exit; @@ -214,6 +227,10 @@ tokenizeriter_next(tokenizeriterobject *it) } if (it->tok->tok_extra_tokens) { + if (is_trailing_token) { + lineno = end_lineno = lineno + 1; + col_offset = end_col_offset = 0; + } // Necessary adjustments to match the original Python tokenize // implementation if (type > DEDENT && type < OP) { @@ -231,6 +248,9 @@ tokenizeriter_next(tokenizeriterobject *it) result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line); exit: _PyToken_Free(&token); + if (type == ENDMARKER) { + it->done = 1; + } return result; } |