diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2023-05-26 22:14:45 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-26 22:14:45 (GMT) |
commit | 2c02c6886739f0ed420d900b2a29933bc1c5df37 (patch) | |
tree | 47e9a870570009cdaafb46d23bc5977d3e6a682a /Python/Python-tokenize.c | |
parent | 05189f3054e3a831967a1bb53d14d97c97e31598 (diff) | |
download | cpython-2c02c6886739f0ed420d900b2a29933bc1c5df37.zip cpython-2c02c6886739f0ed420d900b2a29933bc1c5df37.tar.gz cpython-2c02c6886739f0ed420d900b2a29933bc1c5df37.tar.bz2 |
[3.12] gh-104976: Ensure trailing dedent tokens are emitted as the previous tokenizer (GH-104980) (#105000)
Diffstat (limited to 'Python/Python-tokenize.c')
-rw-r--r-- | Python/Python-tokenize.c | 26 |
1 files changed, 23 insertions, 3 deletions
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index 88087c1..01c2215 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -30,6 +30,7 @@ class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_t typedef struct { PyObject_HEAD struct tok_state *tok; + int done; } tokenizeriterobject; /*[clinic input] @@ -63,6 +64,7 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source, if (extra_tokens) { self->tok->tok_extra_tokens = 1; } + self->done = 0; return (PyObject *)self; } @@ -179,8 +181,9 @@ tokenizeriter_next(tokenizeriterobject *it) } goto exit; } - if (type == ERRORTOKEN || type == ENDMARKER) { + if (it->done || type == ERRORTOKEN) { PyErr_SetString(PyExc_StopIteration, "EOF"); + it->done = 1; goto exit; } PyObject *str = NULL; @@ -194,9 +197,19 @@ tokenizeriter_next(tokenizeriterobject *it) goto exit; } + int is_trailing_token = 0; + if (type == ENDMARKER || (type == DEDENT && it->tok->done == E_EOF)) { + is_trailing_token = 1; + } + const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start; - Py_ssize_t size = it->tok->inp - line_start; - PyObject *line = PyUnicode_DecodeUTF8(line_start, size, "replace"); + PyObject* line = NULL; + if (it->tok->tok_extra_tokens && is_trailing_token) { + line = PyUnicode_FromString(""); + } else { + Py_ssize_t size = it->tok->inp - line_start; + line = PyUnicode_DecodeUTF8(line_start, size, "replace"); + } if (line == NULL) { Py_DECREF(str); goto exit; @@ -214,6 +227,10 @@ tokenizeriter_next(tokenizeriterobject *it) } if (it->tok->tok_extra_tokens) { + if (is_trailing_token) { + lineno = end_lineno = lineno + 1; + col_offset = end_col_offset = 0; + } // Necessary adjustments to match the original Python tokenize // implementation if (type > DEDENT && type < OP) { @@ -231,6 +248,9 @@ tokenizeriter_next(tokenizeriterobject *it) result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line); exit: _PyToken_Free(&token); + if (type == ENDMARKER) { + it->done = 1; + } return result; } |