summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authorMarta Gómez Macías <mgmacias@google.com>2023-05-22 00:29:04 (GMT)
committerGitHub <noreply@github.com>2023-05-22 00:29:04 (GMT)
commit8817886ae571f5b5ce4e2e6cfd2458622d0efac1 (patch)
tree261c32bcaf57a4f76a5c4fac5bd856803d636da8 /Python
parent4b107d86f38f6778562d4fe5e1d881b52c9d9d6c (diff)
downloadcpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.zip
cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.tar.gz
cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.tar.bz2
gh-102856: Tokenize performance improvement (#104731)
Diffstat (limited to 'Python')
-rw-r--r--Python/Python-tokenize.c17
1 files changed, 16 insertions, 1 deletions
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c
index ece2386..43b44be 100644
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@@ -207,7 +207,22 @@ tokenizeriter_next(tokenizeriterobject *it)
end_col_offset = _PyPegen_byte_offset_to_character_offset(line, token.end - it->tok->line_start);
}
- result = Py_BuildValue("(NinnnnN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line);
+ if (it->tok->tok_extra_tokens) {
+ // Necessary adjustments to match the original Python tokenize
+ // implementation
+ if (type > DEDENT && type < OP) {
+ type = OP;
+ }
+ else if (type == ASYNC || type == AWAIT) {
+ type = NAME;
+ }
+ else if (type == NEWLINE) {
+ str = PyUnicode_FromString("\n");
+ end_col_offset++;
+ }
+ }
+
+ result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line);
exit:
_PyToken_Free(&token);
return result;