gh-102856: Tokenize performance improvement (#104731)

author: Marta Gómez Macías <mgmacias@google.com> 2023-05-22 00:29:04 (GMT)
committer: GitHub <noreply@github.com> 2023-05-22 00:29:04 (GMT)
commit: 8817886ae571f5b5ce4e2e6cfd2458622d0efac1 (patch)
tree: 261c32bcaf57a4f76a5c4fac5bd856803d636da8 /Python
parent: 4b107d86f38f6778562d4fe5e1d881b52c9d9d6c (diff)
download: cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.zip
cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.tar.gz
cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.tar.bz2
1 files changed, 16 insertions, 1 deletions
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c
index ece2386..43b44be 100644
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@@ -207,7 +207,22 @@ tokenizeriter_next(tokenizeriterobject *it)
         end_col_offset = _PyPegen_byte_offset_to_character_offset(line, token.end - it->tok->line_start);
     }
 
-    result = Py_BuildValue("(NinnnnN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line);
+    if (it->tok->tok_extra_tokens) {
+        // Necessary adjustments to match the original Python tokenize
+        // implementation
+        if (type > DEDENT && type < OP) {
+            type = OP;
+        }
+        else if (type == ASYNC || type == AWAIT) {
+            type = NAME;
+        }
+        else if (type == NEWLINE) {
+            str = PyUnicode_FromString("\n");
+            end_col_offset++;
+        }
+    }
+
+    result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line);
 exit:
     _PyToken_Free(&token);
     return result;
author	Marta Gómez Macías <mgmacias@google.com>	2023-05-22 00:29:04 (GMT)
committer	GitHub <noreply@github.com>	2023-05-22 00:29:04 (GMT)
commit	8817886ae571f5b5ce4e2e6cfd2458622d0efac1 (patch)
tree	261c32bcaf57a4f76a5c4fac5bd856803d636da8 /Python
parent	4b107d86f38f6778562d4fe5e1d881b52c9d9d6c (diff)
download	cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.zip cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.tar.gz cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.tar.bz2