diff options
author | Pablo Galindo <Pablogsal@gmail.com> | 2021-06-08 23:54:29 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-08 23:54:29 (GMT) |
commit | 9fd21f649d66dcb10108ee395fd68ed32c8239cd (patch) | |
tree | f04ca9dce4d4cad34d7833252326cd195031283a /Parser/pegen.c | |
parent | 2ea6d890281c415e0a2f00e63526e592da8ce3d9 (diff) | |
download | cpython-9fd21f649d66dcb10108ee395fd68ed32c8239cd.zip cpython-9fd21f649d66dcb10108ee395fd68ed32c8239cd.tar.gz cpython-9fd21f649d66dcb10108ee395fd68ed32c8239cd.tar.bz2 |
bpo-44349: Fix edge case when displaying text from files with encoding in syntax errors (GH-26611)
Diffstat (limited to 'Parser/pegen.c')
-rw-r--r-- | Parser/pegen.c | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c index 42a9922..e651819 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -456,10 +456,13 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, goto error; } + // PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file + // with an arbitrary encoding or otherwise we could get some badly decoded text. + int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0); if (p->tok->fp_interactive) { error_line = get_error_line(p, lineno); } - else if (p->start_rule == Py_file_input) { + else if (uses_utf8_codec && p->start_rule == Py_file_input) { error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno); } @@ -471,7 +474,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, we're actually parsing from a file, which has an E_EOF SyntaxError and in that case `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which does not physically exist */ - assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); + assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec); if (p->tok->lineno <= lineno) { Py_ssize_t size = p->tok->inp - p->tok->buf; |