diff options
author | Marta Gómez Macías <mgmacias@google.com> | 2023-05-22 00:29:04 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-22 00:29:04 (GMT) |
commit | 8817886ae571f5b5ce4e2e6cfd2458622d0efac1 (patch) | |
tree | 261c32bcaf57a4f76a5c4fac5bd856803d636da8 /Python | |
parent | 4b107d86f38f6778562d4fe5e1d881b52c9d9d6c (diff) | |
download | cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.zip cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.tar.gz cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.tar.bz2 |
gh-102856: Tokenize performance improvement (#104731)
Diffstat (limited to 'Python')
-rw-r--r-- | Python/Python-tokenize.c | 17 |
1 files changed, 16 insertions, 1 deletions
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c index ece2386..43b44be 100644 --- a/Python/Python-tokenize.c +++ b/Python/Python-tokenize.c @@ -207,7 +207,22 @@ tokenizeriter_next(tokenizeriterobject *it) end_col_offset = _PyPegen_byte_offset_to_character_offset(line, token.end - it->tok->line_start); } - result = Py_BuildValue("(NinnnnN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line); + if (it->tok->tok_extra_tokens) { + // Necessary adjustments to match the original Python tokenize + // implementation + if (type > DEDENT && type < OP) { + type = OP; + } + else if (type == ASYNC || type == AWAIT) { + type = NAME; + } + else if (type == NEWLINE) { + str = PyUnicode_FromString("\n"); + end_col_offset++; + } + } + + result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line); exit: _PyToken_Free(&token); return result; |