summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_cmd_line.py10
-rw-r--r--Parser/pegen.c14
2 files changed, 17 insertions, 7 deletions
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index e87eede..25d3eec 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -851,13 +851,19 @@ class IgnoreEnvironmentTest(unittest.TestCase):
)
class SyntaxErrorTests(unittest.TestCase):
- def test_tokenizer_error_with_stdin(self):
- proc = subprocess.run([sys.executable, "-"], input = b"(1+2+3",
+ def check_string(self, code):
+ proc = subprocess.run([sys.executable, "-"], input=code,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
self.assertNotEqual(proc.returncode, 0)
self.assertNotEqual(proc.stderr, None)
self.assertIn(b"\nSyntaxError", proc.stderr)
+ def test_tokenizer_error_with_stdin(self):
+ self.check_string(b"(1+2+3")
+
+ def test_decoding_error_at_the_end_of_the_line(self):
+ self.check_string(b"'\u1f'")
+
def test_main():
support.run_unittest(CmdLineTest, IgnoreEnvironmentTest, SyntaxErrorTests)
support.reap_children()
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 24aa3af..953480d 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -147,7 +147,11 @@ byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
if (!str) {
return 0;
}
- assert(col_offset >= 0 && (unsigned long)col_offset <= strlen(str));
+ Py_ssize_t len = strlen(str);
+ if (col_offset > len) {
+ col_offset = len;
+ }
+ assert(col_offset >= 0);
PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
if (!text) {
return 0;
@@ -392,10 +396,10 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
static PyObject *
get_error_line(Parser *p, Py_ssize_t lineno)
{
- /* If p->tok->fp == NULL, then we're parsing from a string, which means that
- the whole source is stored in p->tok->str. If not, then we're parsing
- from the REPL, so the source lines of the current (multi-line) statement
- are stored in p->tok->stdin_content */
+ /* If the file descriptor is interactive, the source lines of the current
+ * (multi-line) statement are stored in p->tok->interactive_src_start.
+ * If not, we're parsing from a string, which means that the whole source
+ * is stored in p->tok->str. */
assert(p->tok->fp == NULL || p->tok->fp == stdin);
char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;