summaryrefslogtreecommitdiffstats
path: root/Lib/tokenize.py
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2023-06-07 11:38:36 (GMT)
committerGitHub <noreply@github.com>2023-06-07 11:38:36 (GMT)
commitc84d4d165dd92e41d6d8661e71b9fd9ac03bfd9e (patch)
treee8f0ccd59d0f309720d7312e594e64703317c560 /Lib/tokenize.py
parentc607551baf62d7201b147f20095160eee0140684 (diff)
downloadcpython-c84d4d165dd92e41d6d8661e71b9fd9ac03bfd9e.zip
cpython-c84d4d165dd92e41d6d8661e71b9fd9ac03bfd9e.tar.gz
cpython-c84d4d165dd92e41d6d8661e71b9fd9ac03bfd9e.tar.bz2
[3.12] gh-105390: Correctly raise TokenError instead of SyntaxError for tokenize errors (GH-105399) (#105439)
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r--Lib/tokenize.py20
1 files changed, 18 insertions, 2 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index a07a8bf..49e8144 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -517,14 +517,30 @@ def main():
perror("unexpected error: %s" % err)
raise
+def _transform_msg(msg):
+ """Transform error messages from the C tokenizer into the Python tokenize
+
+ The C tokenizer is more picky than the Python one, so we need to massage
+ the error messages a bit for backwards compatibility.
+ """
+ if "unterminated triple-quoted string literal" in msg:
+ return "EOF in multi-line string"
+ return msg
+
def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False):
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
if encoding is None:
it = _tokenize.TokenizerIter(source, extra_tokens=extra_tokens)
else:
it = _tokenize.TokenizerIter(source, encoding=encoding, extra_tokens=extra_tokens)
- for info in it:
- yield TokenInfo._make(info)
+ try:
+ for info in it:
+ yield TokenInfo._make(info)
+ except SyntaxError as e:
+ if type(e) != SyntaxError:
+ raise e from None
+ msg = _transform_msg(e.msg)
+ raise TokenError(msg, (e.lineno, e.offset)) from None
if __name__ == "__main__":