summaryrefslogtreecommitdiffstats
path: root/Parser/pegen.c
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2021-06-09 00:29:21 (GMT)
committerGitHub <noreply@github.com>2021-06-09 00:29:21 (GMT)
commitc0496093e54edb78d2bd09b083b73e1e5b9e7242 (patch)
treef9f30b61fd77edb18e2030c32bbf5dabf0754035 /Parser/pegen.c
parenteeefa7f6c0cc64bc74c3b96a0ebbff1a2b9d3199 (diff)
downloadcpython-c0496093e54edb78d2bd09b083b73e1e5b9e7242.zip
cpython-c0496093e54edb78d2bd09b083b73e1e5b9e7242.tar.gz
cpython-c0496093e54edb78d2bd09b083b73e1e5b9e7242.tar.bz2
bpo-44349: Fix edge case when displaying text from files with encoding in syntax errors (GH-26611) (GH-26616)
(cherry picked from commit 9fd21f649d66dcb10108ee395fd68ed32c8239cd) Co-authored-by: Pablo Galindo <Pablogsal@gmail.com> Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
Diffstat (limited to 'Parser/pegen.c')
-rw-r--r--Parser/pegen.c7
1 files changed, 5 insertions, 2 deletions
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 42a9922..e651819 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -456,10 +456,13 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
goto error;
}
+ // PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
+ // with an arbitrary encoding or otherwise we could get some badly decoded text.
+ int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
if (p->tok->fp_interactive) {
error_line = get_error_line(p, lineno);
}
- else if (p->start_rule == Py_file_input) {
+ else if (uses_utf8_codec && p->start_rule == Py_file_input) {
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
}
@@ -471,7 +474,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
does not physically exist */
- assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
+ assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
if (p->tok->lineno <= lineno) {
Py_ssize_t size = p->tok->inp - p->tok->buf;