From 123ff266cda9ad279106f20dca06ba114f6a9b8a Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 22 Mar 2021 16:24:39 +0000 Subject: bpo-43591: Fix error location in interactive mode for errors at the end of the line (GH-24973) Co-authored-by: Erlend Egeberg Aasland --- Lib/test/test_cmd_line.py | 10 ++++++++-- Parser/pegen.c | 14 +++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index e87eede..25d3eec 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -851,13 +851,19 @@ class IgnoreEnvironmentTest(unittest.TestCase): ) class SyntaxErrorTests(unittest.TestCase): - def test_tokenizer_error_with_stdin(self): - proc = subprocess.run([sys.executable, "-"], input = b"(1+2+3", + def check_string(self, code): + proc = subprocess.run([sys.executable, "-"], input=code, stdout=subprocess.PIPE, stderr=subprocess.PIPE) self.assertNotEqual(proc.returncode, 0) self.assertNotEqual(proc.stderr, None) self.assertIn(b"\nSyntaxError", proc.stderr) + def test_tokenizer_error_with_stdin(self): + self.check_string(b"(1+2+3") + + def test_decoding_error_at_the_end_of_the_line(self): + self.check_string(b"'\u1f'") + def test_main(): support.run_unittest(CmdLineTest, IgnoreEnvironmentTest, SyntaxErrorTests) support.reap_children() diff --git a/Parser/pegen.c b/Parser/pegen.c index 24aa3af..953480d 100644 --- a/Parser/pegen.c +++ b/Parser/pegen.c @@ -147,7 +147,11 @@ byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset) if (!str) { return 0; } - assert(col_offset >= 0 && (unsigned long)col_offset <= strlen(str)); + Py_ssize_t len = strlen(str); + if (col_offset > len) { + col_offset = len; + } + assert(col_offset >= 0); PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace"); if (!text) { return 0; @@ -392,10 +396,10 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...) static PyObject * get_error_line(Parser *p, Py_ssize_t lineno) { - /* If p->tok->fp == NULL, then we're parsing from a string, which means that - the whole source is stored in p->tok->str. If not, then we're parsing - from the REPL, so the source lines of the current (multi-line) statement - are stored in p->tok->stdin_content */ + /* If the file descriptor is interactive, the source lines of the current + * (multi-line) statement are stored in p->tok->interactive_src_start. + * If not, we're parsing from a string, which means that the whole source + * is stored in p->tok->str. */ assert(p->tok->fp == NULL || p->tok->fp == stdin); char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str; -- cgit v0.12