summaryrefslogtreecommitdiffstats
path: root/Python/Python-tokenize.c
diff options
context:
space:
mode:
authorPablo Galindo Salgado <Pablogsal@gmail.com>2023-05-26 21:02:26 (GMT)
committerGitHub <noreply@github.com>2023-05-26 21:02:26 (GMT)
commit46b52e6e2bda51d0b89a64ee36ce2d305a7409f3 (patch)
tree29e431021c874093e6964b4d0ed8f6ba4d096ee6 /Python/Python-tokenize.c
parent402ee5a68b306b489b782478ab96e8e3b913587a (diff)
downloadcpython-46b52e6e2bda51d0b89a64ee36ce2d305a7409f3.zip
cpython-46b52e6e2bda51d0b89a64ee36ce2d305a7409f3.tar.gz
cpython-46b52e6e2bda51d0b89a64ee36ce2d305a7409f3.tar.bz2
gh-104976: Ensure trailing dedent tokens are emitted as the previous tokenizer (#104980)
Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
Diffstat (limited to 'Python/Python-tokenize.c')
-rw-r--r--Python/Python-tokenize.c26
1 files changed, 23 insertions, 3 deletions
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c
index 88087c1..01c2215 100644
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@@ -30,6 +30,7 @@ class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_t
typedef struct
{
PyObject_HEAD struct tok_state *tok;
+ int done;
} tokenizeriterobject;
/*[clinic input]
@@ -63,6 +64,7 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source,
if (extra_tokens) {
self->tok->tok_extra_tokens = 1;
}
+ self->done = 0;
return (PyObject *)self;
}
@@ -179,8 +181,9 @@ tokenizeriter_next(tokenizeriterobject *it)
}
goto exit;
}
- if (type == ERRORTOKEN || type == ENDMARKER) {
+ if (it->done || type == ERRORTOKEN) {
PyErr_SetString(PyExc_StopIteration, "EOF");
+ it->done = 1;
goto exit;
}
PyObject *str = NULL;
@@ -194,9 +197,19 @@ tokenizeriter_next(tokenizeriterobject *it)
goto exit;
}
+ int is_trailing_token = 0;
+ if (type == ENDMARKER || (type == DEDENT && it->tok->done == E_EOF)) {
+ is_trailing_token = 1;
+ }
+
const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start;
- Py_ssize_t size = it->tok->inp - line_start;
- PyObject *line = PyUnicode_DecodeUTF8(line_start, size, "replace");
+ PyObject* line = NULL;
+ if (it->tok->tok_extra_tokens && is_trailing_token) {
+ line = PyUnicode_FromString("");
+ } else {
+ Py_ssize_t size = it->tok->inp - line_start;
+ line = PyUnicode_DecodeUTF8(line_start, size, "replace");
+ }
if (line == NULL) {
Py_DECREF(str);
goto exit;
@@ -214,6 +227,10 @@ tokenizeriter_next(tokenizeriterobject *it)
}
if (it->tok->tok_extra_tokens) {
+ if (is_trailing_token) {
+ lineno = end_lineno = lineno + 1;
+ col_offset = end_col_offset = 0;
+ }
// Necessary adjustments to match the original Python tokenize
// implementation
if (type > DEDENT && type < OP) {
@@ -231,6 +248,9 @@ tokenizeriter_next(tokenizeriterobject *it)
result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line);
exit:
_PyToken_Free(&token);
+ if (type == ENDMARKER) {
+ it->done = 1;
+ }
return result;
}