summaryrefslogtreecommitdiffstats
path: root/Python/Python-tokenize.c
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2023-05-26 22:14:45 (GMT)
committerGitHub <noreply@github.com>2023-05-26 22:14:45 (GMT)
commit2c02c6886739f0ed420d900b2a29933bc1c5df37 (patch)
tree47e9a870570009cdaafb46d23bc5977d3e6a682a /Python/Python-tokenize.c
parent05189f3054e3a831967a1bb53d14d97c97e31598 (diff)
downloadcpython-2c02c6886739f0ed420d900b2a29933bc1c5df37.zip
cpython-2c02c6886739f0ed420d900b2a29933bc1c5df37.tar.gz
cpython-2c02c6886739f0ed420d900b2a29933bc1c5df37.tar.bz2
[3.12] gh-104976: Ensure trailing dedent tokens are emitted as the previous tokenizer (GH-104980) (#105000)
Diffstat (limited to 'Python/Python-tokenize.c')
-rw-r--r--Python/Python-tokenize.c26
1 files changed, 23 insertions, 3 deletions
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c
index 88087c1..01c2215 100644
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@@ -30,6 +30,7 @@ class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_t
typedef struct
{
PyObject_HEAD struct tok_state *tok;
+ int done;
} tokenizeriterobject;
/*[clinic input]
@@ -63,6 +64,7 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source,
if (extra_tokens) {
self->tok->tok_extra_tokens = 1;
}
+ self->done = 0;
return (PyObject *)self;
}
@@ -179,8 +181,9 @@ tokenizeriter_next(tokenizeriterobject *it)
}
goto exit;
}
- if (type == ERRORTOKEN || type == ENDMARKER) {
+ if (it->done || type == ERRORTOKEN) {
PyErr_SetString(PyExc_StopIteration, "EOF");
+ it->done = 1;
goto exit;
}
PyObject *str = NULL;
@@ -194,9 +197,19 @@ tokenizeriter_next(tokenizeriterobject *it)
goto exit;
}
+ int is_trailing_token = 0;
+ if (type == ENDMARKER || (type == DEDENT && it->tok->done == E_EOF)) {
+ is_trailing_token = 1;
+ }
+
const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start;
- Py_ssize_t size = it->tok->inp - line_start;
- PyObject *line = PyUnicode_DecodeUTF8(line_start, size, "replace");
+ PyObject* line = NULL;
+ if (it->tok->tok_extra_tokens && is_trailing_token) {
+ line = PyUnicode_FromString("");
+ } else {
+ Py_ssize_t size = it->tok->inp - line_start;
+ line = PyUnicode_DecodeUTF8(line_start, size, "replace");
+ }
if (line == NULL) {
Py_DECREF(str);
goto exit;
@@ -214,6 +227,10 @@ tokenizeriter_next(tokenizeriterobject *it)
}
if (it->tok->tok_extra_tokens) {
+ if (is_trailing_token) {
+ lineno = end_lineno = lineno + 1;
+ col_offset = end_col_offset = 0;
+ }
// Necessary adjustments to match the original Python tokenize
// implementation
if (type > DEDENT && type < OP) {
@@ -231,6 +248,9 @@ tokenizeriter_next(tokenizeriterobject *it)
result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line);
exit:
_PyToken_Free(&token);
+ if (type == ENDMARKER) {
+ it->done = 1;
+ }
return result;
}