bpo-45848: Allow the parser to get error lines from encoded files (GH-29646)

author: Pablo Galindo Salgado <Pablogsal@gmail.com> 2021-11-20 14:36:07 (GMT)
committer: GitHub <noreply@github.com> 2021-11-20 14:36:07 (GMT)
commit: fdcc46d9554094994f78bedf6dc9220e5d5ee668 (patch)
tree: 4f8dbf3fc78bf76311d63c84a5c2384c9fd089d9 /Parser/pegen.c
parent: 6d430ef5ab62158a200b94dff31b89524a9576bb (diff)
download: cpython-fdcc46d9554094994f78bedf6dc9220e5d5ee668.zip
cpython-fdcc46d9554094994f78bedf6dc9220e5d5ee668.tar.gz
cpython-fdcc46d9554094994f78bedf6dc9220e5d5ee668.tar.bz2
1 files changed, 8 insertions, 7 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 09c1a19..b3fdae4 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -482,14 +482,12 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
         goto error;
     }
 
-    // PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
-    // with an arbitrary encoding or otherwise we could get some badly decoded text.
-    int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
     if (p->tok->fp_interactive) {
         error_line = get_error_line(p, lineno);
     }
-    else if (uses_utf8_codec && p->start_rule == Py_file_input) {
-        error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
+    else if (p->start_rule == Py_file_input) {
+        error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
+                                                     (int) lineno, p->tok->encoding);
     }
 
     if (!error_line) {
@@ -500,15 +498,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
            we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
            `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
            does not physically exist */
-        assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
+        assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
 
         if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
             Py_ssize_t size = p->tok->inp - p->tok->buf;
             error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
         }
-        else {
+        else if (p->tok->fp == NULL || p->tok->fp == stdin) {
             error_line = get_error_line(p, lineno);
         }
+        else {
+            error_line = PyUnicode_FromStringAndSize("", 0);
+        }
         if (!error_line) {
             goto error;
         }
author	Pablo Galindo Salgado <Pablogsal@gmail.com>	2021-11-20 14:36:07 (GMT)
committer	GitHub <noreply@github.com>	2021-11-20 14:36:07 (GMT)
commit	fdcc46d9554094994f78bedf6dc9220e5d5ee668 (patch)
tree	4f8dbf3fc78bf76311d63c84a5c2384c9fd089d9 /Parser/pegen.c
parent	6d430ef5ab62158a200b94dff31b89524a9576bb (diff)
download	cpython-fdcc46d9554094994f78bedf6dc9220e5d5ee668.zip cpython-fdcc46d9554094994f78bedf6dc9220e5d5ee668.tar.gz cpython-fdcc46d9554094994f78bedf6dc9220e5d5ee668.tar.bz2