diff options
author | Pablo Galindo Salgado <Pablogsal@gmail.com> | 2021-11-20 14:36:07 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-20 14:36:07 (GMT) |
commit | fdcc46d9554094994f78bedf6dc9220e5d5ee668 (patch) | |
tree | 4f8dbf3fc78bf76311d63c84a5c2384c9fd089d9 /Parser/pegen.c | |
parent | 6d430ef5ab62158a200b94dff31b89524a9576bb (diff) | |
download | cpython-fdcc46d9554094994f78bedf6dc9220e5d5ee668.zip cpython-fdcc46d9554094994f78bedf6dc9220e5d5ee668.tar.gz cpython-fdcc46d9554094994f78bedf6dc9220e5d5ee668.tar.bz2 |
bpo-45848: Allow the parser to get error lines from encoded files (GH-29646)
Diffstat (limited to 'Parser/pegen.c')
-rw-r--r-- | Parser/pegen.c | 15 |
1 files changed, 8 insertions, 7 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c index 09c1a19..b3fdae4 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -482,14 +482,12 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, goto error; } - // PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file - // with an arbitrary encoding or otherwise we could get some badly decoded text. - int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0); if (p->tok->fp_interactive) { error_line = get_error_line(p, lineno); } - else if (uses_utf8_codec && p->start_rule == Py_file_input) { - error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno); + else if (p->start_rule == Py_file_input) { + error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename, + (int) lineno, p->tok->encoding); } if (!error_line) { @@ -500,15 +498,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, we're actually parsing from a file, which has an E_EOF SyntaxError and in that case `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which does not physically exist */ - assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec); + assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) { Py_ssize_t size = p->tok->inp - p->tok->buf; error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace"); } - else { + else if (p->tok->fp == NULL || p->tok->fp == stdin) { error_line = get_error_line(p, lineno); } + else { + error_line = PyUnicode_FromStringAndSize("", 0); + } if (!error_line) { goto error; } |