gh-102856: Tokenize performance improvement (#104731)

author: Marta Gómez Macías <mgmacias@google.com> 2023-05-22 00:29:04 (GMT)
committer: GitHub <noreply@github.com> 2023-05-22 00:29:04 (GMT)
commit: 8817886ae571f5b5ce4e2e6cfd2458622d0efac1 (patch)
tree: 261c32bcaf57a4f76a5c4fac5bd856803d636da8 /Lib/tokenize.py
parent: 4b107d86f38f6778562d4fe5e1d881b52c9d9d6c (diff)
download: cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.zip
cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.tar.gz
cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.tar.bz2
1 files changed, 1 insertions, 12 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index cef2773..911f0f1 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -449,16 +449,6 @@ def _tokenize(rl_gen, encoding):
     source = b"".join(rl_gen).decode(encoding)
     token = None
     for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
-        # TODO: Marta -> limpiar esto
-        if 6 < token.type <= 54:
-            token = token._replace(type=OP)
-        if token.type in {ASYNC, AWAIT}:
-            token = token._replace(type=NAME)
-        if token.type == NEWLINE:
-            l_start, c_start = token.start
-            l_end, c_end = token.end
-            token = token._replace(string='\n', start=(l_start, c_start), end=(l_end, c_end+1))
-
         yield token
     if token is not None:
         last_line, _ = token.start
@@ -550,8 +540,7 @@ def _generate_tokens_from_c_tokenizer(source, extra_tokens=False):
     """Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
     import _tokenize as c_tokenizer
     for info in c_tokenizer.TokenizerIter(source, extra_tokens=extra_tokens):
-        tok, type, lineno, end_lineno, col_off, end_col_off, line = info
-        yield TokenInfo(type, tok, (lineno, col_off), (end_lineno, end_col_off), line)
+        yield TokenInfo._make(info)
 
 
 if __name__ == "__main__":
author	Marta Gómez Macías <mgmacias@google.com>	2023-05-22 00:29:04 (GMT)
committer	GitHub <noreply@github.com>	2023-05-22 00:29:04 (GMT)
commit	8817886ae571f5b5ce4e2e6cfd2458622d0efac1 (patch)
tree	261c32bcaf57a4f76a5c4fac5bd856803d636da8 /Lib/tokenize.py
parent	4b107d86f38f6778562d4fe5e1d881b52c9d9d6c (diff)
download	cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.zip cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.tar.gz cpython-8817886ae571f5b5ce4e2e6cfd2458622d0efac1.tar.bz2