diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2023-06-07 11:38:36 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-07 11:38:36 (GMT) |
commit | c84d4d165dd92e41d6d8661e71b9fd9ac03bfd9e (patch) | |
tree | e8f0ccd59d0f309720d7312e594e64703317c560 /Lib/tokenize.py | |
parent | c607551baf62d7201b147f20095160eee0140684 (diff) | |
download | cpython-c84d4d165dd92e41d6d8661e71b9fd9ac03bfd9e.zip cpython-c84d4d165dd92e41d6d8661e71b9fd9ac03bfd9e.tar.gz cpython-c84d4d165dd92e41d6d8661e71b9fd9ac03bfd9e.tar.bz2 |
[3.12] gh-105390: Correctly raise TokenError instead of SyntaxError for tokenize errors (GH-105399) (#105439)
Diffstat (limited to 'Lib/tokenize.py')
-rw-r--r-- | Lib/tokenize.py | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/Lib/tokenize.py b/Lib/tokenize.py index a07a8bf..49e8144 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -517,14 +517,30 @@ def main(): perror("unexpected error: %s" % err) raise +def _transform_msg(msg): + """Transform error messages from the C tokenizer into the Python tokenize + + The C tokenizer is more picky than the Python one, so we need to massage + the error messages a bit for backwards compatibility. + """ + if "unterminated triple-quoted string literal" in msg: + return "EOF in multi-line string" + return msg + def _generate_tokens_from_c_tokenizer(source, encoding=None, extra_tokens=False): """Tokenize a source reading Python code as unicode strings using the internal C tokenizer""" if encoding is None: it = _tokenize.TokenizerIter(source, extra_tokens=extra_tokens) else: it = _tokenize.TokenizerIter(source, encoding=encoding, extra_tokens=extra_tokens) - for info in it: - yield TokenInfo._make(info) + try: + for info in it: + yield TokenInfo._make(info) + except SyntaxError as e: + if type(e) != SyntaxError: + raise e from None + msg = _transform_msg(e.msg) + raise TokenError(msg, (e.lineno, e.offset)) from None if __name__ == "__main__": |