diff options
author | Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> | 2021-12-12 16:52:49 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-12 16:52:49 (GMT) |
commit | 94483f1e3cec182fabe19268e579f63045bc984a (patch) | |
tree | dd22d834f0c0fb57fc167a4b7d83d0777aaf4ca2 /Parser/tokenizer.c | |
parent | 438817fdd5b731d486285d205bed2e78b655c0d6 (diff) | |
download | cpython-94483f1e3cec182fabe19268e579f63045bc984a.zip cpython-94483f1e3cec182fabe19268e579f63045bc984a.tar.gz cpython-94483f1e3cec182fabe19268e579f63045bc984a.tar.bz2 |
bpo-46054: Fix parsing error when parsing non-utf8 characters in source files (GH-30068) (GH-30069)
(cherry picked from commit 4325a766f5f603ef6dfb8c4d5798e5e73cb5efd5)
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r-- | Parser/tokenizer.c | 13 |
1 files changed, 5 insertions, 8 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 672fdb9..8e9c69d 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -818,10 +818,10 @@ tok_readline_raw(struct tok_state *tok) tok_concatenate_interactive_new_line(tok, line) == -1) { return 0; } - if (*tok->inp == '\0') { + tok->inp = strchr(tok->inp, '\0'); + if (tok->inp == tok->buf) { return 0; } - tok->inp = strchr(tok->inp, '\0'); } while (tok->inp[-1] != '\n'); return 1; } @@ -983,12 +983,9 @@ tok_underflow_file(struct tok_state *tok) { } /* The default encoding is UTF-8, so make sure we don't have any non-UTF-8 sequences in it. */ - if (!tok->encoding - && (tok->decoding_state != STATE_NORMAL || tok->lineno >= 2)) { - if (!ensure_utf8(tok->cur, tok)) { - error_ret(tok); - return 0; - } + if (!tok->encoding && !ensure_utf8(tok->cur, tok)) { + error_ret(tok); + return 0; } assert(tok->done == E_OK); return tok->done == E_OK; |